Coverage Report

Created: 2025-05-09 19:15

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_cnnp_model_addons.c
Line
Count
Source
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_internal.h"
5
#include "_ccv_cnnp_model.h"
6
7
// MARK - Add-on Functions
8
9
static int _ccv_cnnp_model_clip_grad_norm_reduce_norm2(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
10
2
{
11
2
  const int device_id = CCV_TENSOR_GET_DEVICE_ID(inputs[0]->info.type);
12
2
  ccv_nnc_tensor_t* const old_norm2 = outputs[1 + device_id * 2];
13
2
  ccv_nnc_tensor_t* const norm2 = outputs[1 + device_id * 2 + 1];
14
2
  const int tensor_count = ccv_nnc_tensor_count(inputs[0]->info);
15
2
  if (tensor_count == 1)
16
2
    ccv_nnc_cmd_exec(CMD_MUL_FORWARD(1), hint, flags, TENSOR_LIST(inputs[0], inputs[0]), TENSOR_LIST(norm2), stream_context);
17
0
  else {
18
0
    ccv_nnc_cmd_exec(CMD_REDUCE_NORM2_FORWARD(), hint, flags, TENSOR_LIST(inputs[0]), TENSOR_LIST(norm2), stream_context);
19
0
    ccv_nnc_cmd_exec(CMD_MUL_FORWARD(1), hint, flags, TENSOR_LIST(norm2, norm2), TENSOR_LIST(norm2), stream_context);
20
0
  }
21
2
  ccv_nnc_cmd_exec(CMD_ADD_FORWARD(1, 1), hint, flags, TENSOR_LIST(old_norm2, norm2), TENSOR_LIST(old_norm2), stream_context);
22
2
  return CCV_NNC_EXEC_SUCCESS;
23
2
}
24
25
static ccv_nnc_cmd_vtab_t clip_grad_norm_reduce_norm2_vtab = {
26
  .exec = _ccv_cnnp_model_clip_grad_norm_reduce_norm2
27
};
28
29
static int _ccv_cnnp_model_clip_grad_norm_scatter_norm2(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
30
2
{
31
2
  const int device_id = CCV_TENSOR_GET_DEVICE_ID(inputs[0]->info.type);
32
2
  ccv_nnc_tensor_t* const norm2 = inputs[1 + device_id * 2];
33
2
  ccv_nnc_cmd_exec(CMD_MUL_FORWARD(1), hint, flags, TENSOR_LIST(inputs[0], norm2), TENSOR_LIST(outputs[0]), stream_context);
34
2
  return CCV_NNC_EXEC_SUCCESS;
35
2
}
36
37
static ccv_nnc_cmd_vtab_t clip_grad_norm_scatter_norm2_vtab = {
38
  .exec = _ccv_cnnp_model_clip_grad_norm_scatter_norm2
39
};
40
41
void ccv_cnnp_model_parameters_clip_grad_norm(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, int norm_type, float max_norm, ccv_nnc_stream_context_t* const stream_context)
42
2
{
43
2
  assert(norm_type == 2);
44
2
  ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
45
2
  assert(compiled_data);
46
2
  const int parallel_count = ccv_max(model->parallel_count, 1);
47
2
  ccv_nnc_tensor_t* norm2[parallel_count * 2];
48
2
  ccv_nnc_tensor_t* max_normt[parallel_count];
49
2
  const int stream_type = model->compiled_data->stream_type;
50
2
  int i;
51
2
  if (stream_type == CCV_STREAM_CONTEXT_GPU)
52
0
  {
53
0
    for (i = 0; i < parallel_count; i++)
54
0
    {
55
0
      ccv_nnc_tensor_param_t info = {
56
0
        .type = CCV_TENSOR_GPU_MEMORY,
57
0
        .format = CCV_TENSOR_FORMAT_NHWC,
58
0
        .datatype = CCV_32F,
59
0
        .dim = {1},
60
0
      };
61
0
      CCV_TENSOR_SET_DEVICE_ID(info.type, i);
62
0
      norm2[i * 2] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
63
0
      norm2[i * 2 + 1] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
64
0
      max_normt[i] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
65
0
    }
66
2
  } else {
67
4
    for (i = 0; i < parallel_count; 
i++2
)
68
2
    {
69
2
      ccv_nnc_tensor_param_t info = {
70
2
        .type = CCV_TENSOR_CPU_MEMORY,
71
2
        .format = CCV_TENSOR_FORMAT_NHWC,
72
2
        .datatype = CCV_32F,
73
2
        .dim = {1},
74
2
      };
75
2
      norm2[i * 2] = ccv_nnc_tensor_new(0, info, 0);
76
2
      norm2[i * 2 + 1] = ccv_nnc_tensor_new(0, info, 0);
77
2
      max_normt[i] = ccv_nnc_tensor_new(0, info, 0);
78
2
    }
79
2
  }
80
  // zero out old norm2.
81
2
  if (parallel_count > 1)
82
0
  {
83
0
    ccv_nnc_stream_context_t* streams[parallel_count];
84
0
    ccv_nnc_stream_signal_t* signal;
85
0
    if (stream_context)
86
0
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
87
0
    for (i = 0; i < parallel_count; i++)
88
0
    {
89
0
      const int stream_type = CCV_TENSOR_GET_MEMORY(norm2[i * 2]->info.type) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
90
0
      const int device_id = CCV_TENSOR_GET_DEVICE_ID(norm2[i * 2]->info.type);
91
0
      int type = stream_type;
92
0
      CCV_STREAM_SET_DEVICE_ID(type, device_id);
93
0
      ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(compiled_data, type);
94
      // Wait signal to finish.
95
0
      if (stream_context)
96
0
        ccv_nnc_stream_context_wait_signal(stream_0, signal);
97
0
      ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(norm2[i * 2]), stream_0);
98
0
      if (stream_context)
99
0
      {
100
0
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
101
0
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
102
0
      }
103
0
      streams[i] = stream_0;
104
0
    }
105
    // If this should be blocking, blocking it.
106
0
    if (!stream_context)
107
0
      for (i = 0; i < parallel_count; i++)
108
0
        if (streams[i])
109
0
          ccv_nnc_stream_context_wait(streams[i]);
110
2
  } else {
111
2
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(norm2[0]), stream_context);
112
2
  }
113
  // Gather norm2.
114
2
  ccv_nnc_cmd_t reduce_cmd = {
115
2
    .cmd = CCV_NNC_CUSTOM_FORWARD,
116
2
    .isa = &clip_grad_norm_reduce_norm2_vtab,
117
2
  };
118
2
  ccv_cnnp_model_parameter_gradients_map(model, parameters, reduce_cmd, ccv_nnc_no_hint, 0, 0, 0, norm2, parallel_count * 2, stream_context);
119
  // Now compute max(max_norm / norm2, 1.0).
120
2
  if (parallel_count > 1)
121
0
  {
122
0
    ccv_nnc_stream_context_t* streams[parallel_count];
123
0
    ccv_nnc_stream_signal_t* signal;
124
0
    if (stream_context)
125
0
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
126
0
    for (i = 0; i < parallel_count; i++)
127
0
    {
128
0
      const int stream_type = CCV_TENSOR_GET_MEMORY(norm2[i * 2]->info.type) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
129
0
      const int device_id = CCV_TENSOR_GET_DEVICE_ID(norm2[i * 2]->info.type);
130
0
      int type = stream_type;
131
0
      CCV_STREAM_SET_DEVICE_ID(type, device_id);
132
0
      ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(compiled_data, type);
133
      // Wait signal to finish.
134
0
      if (stream_context)
135
0
        ccv_nnc_stream_context_wait_signal(stream_0, signal);
136
0
      ccv_nnc_cmd_exec(CMD_EWSQRT_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(norm2[i * 2]), TENSOR_LIST(norm2[i * 2]), stream_0);
137
0
      ccv_nnc_cmd_exec(CMD_SET_FORWARD(max_norm), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(max_normt[i]), stream_0);
138
0
      ccv_nnc_cmd_exec(CMD_EWDIV_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(max_normt[i], norm2[i * 2]), TENSOR_LIST(norm2[i * 2]), stream_0);
139
0
      ccv_nnc_cmd_exec(CMD_CLAMP_FORWARD(NAN, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(norm2[i * 2]), TENSOR_LIST(norm2[i * 2]), stream_0);
140
0
      if (stream_context)
141
0
      {
142
0
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
143
0
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
144
0
      }
145
0
      streams[i] = stream_0;
146
0
    }
147
    // If this should be blocking, blocking it.
148
0
    if (!stream_context)
149
0
      for (i = 0; i < parallel_count; i++)
150
0
        if (streams[i])
151
0
          ccv_nnc_stream_context_wait(streams[i]);
152
2
  } else {
153
2
    ccv_nnc_cmd_exec(CMD_EWSQRT_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(norm2[0]), TENSOR_LIST(norm2[0]), stream_context);
154
2
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(max_norm), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(max_normt[0]), stream_context);
155
2
    ccv_nnc_cmd_exec(CMD_EWDIV_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(max_normt[0], norm2[0]), TENSOR_LIST(norm2[0]), stream_context);
156
2
    ccv_nnc_cmd_exec(CMD_CLAMP_FORWARD(NAN, 1), ccv_nnc_no_hint, 0, TENSOR_LIST(norm2[0]), TENSOR_LIST(norm2[0]), stream_context);
157
2
  }
158
2
  ccv_nnc_cmd_t scatter_cmd = {
159
2
    .cmd = CCV_NNC_CUSTOM_FORWARD,
160
2
    .isa = &clip_grad_norm_scatter_norm2_vtab,
161
2
  };
162
2
  ccv_cnnp_model_parameter_gradients_map(model, parameters, scatter_cmd, ccv_nnc_no_hint, 0, norm2, parallel_count * 2, 0, 0, stream_context);
163
2
  if (stream_type == CCV_STREAM_CONTEXT_GPU)
164
0
    for (i = 0; i < parallel_count; i++)
165
0
    {
166
0
      ccv_nnc_xpu_free(&compiled_data->xpu_alloc, norm2[i * 2]->data.u8);
167
0
      ccv_nnc_xpu_free(&compiled_data->xpu_alloc, norm2[i * 2 + 1]->data.u8);
168
0
      ccv_nnc_xpu_free(&compiled_data->xpu_alloc, max_normt[i]->data.u8);
169
0
    }
170
4
  for (i = 0; i < parallel_count; 
i++2
)
171
2
  {
172
2
    ccv_nnc_tensor_free(norm2[i * 2]);
173
2
    ccv_nnc_tensor_free(norm2[i * 2 + 1]);
174
2
    ccv_nnc_tensor_free(max_normt[i]);
175
2
  }
176
2
}
177
178
// MARK - Add-on Functions
179
180
static int _ccv_cnnp_model_isnan(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
181
0
{
182
0
  const int device_id = CCV_TENSOR_GET_DEVICE_ID(inputs[0]->info.type);
183
0
  ccv_nnc_tensor_t* const old_isnanr = outputs[1 + device_id * 2];
184
0
  ccv_nnc_tensor_t* const isnanr = outputs[1 + device_id * 2 + 1];
185
0
  ccv_nnc_cmd_t reduce_cmd = CMD_REDUCE_ISNAN_FORWARD();
186
0
  reduce_cmd.info.reduce.count = ccv_nnc_tensor_nd(inputs[0]->info.dim);
187
0
  int i;
188
0
  for (i = 0; i < cmd.info.reduce.count; i++)
189
0
    reduce_cmd.info.reduce.axis[i] = i;
190
0
  ccv_nnc_cmd_exec(reduce_cmd, hint, flags, TENSOR_LIST(inputs[0]), TENSOR_LIST(isnanr), stream_context);
191
0
  ccv_nnc_cmd_exec(CMD_EWSUM_FORWARD(), hint, flags, TENSOR_LIST(old_isnanr, isnanr), TENSOR_LIST(old_isnanr), stream_context);
192
0
  return CCV_NNC_EXEC_SUCCESS;
193
0
}
194
195
static ccv_nnc_cmd_vtab_t reduce_isnan_vtab = {
196
  .exec = _ccv_cnnp_model_isnan
197
};
198
199
int ccv_cnnp_model_parameter_gradients_isnan(ccv_cnnp_model_t* const model, const ccv_cnnp_model_io_t parameters, ccv_nnc_stream_context_t* const stream_context)
200
0
{
201
0
  ccv_cnnp_compiled_data_t* const compiled_data = model->compiled_data;
202
0
  assert(compiled_data);
203
0
  const int parallel_count = ccv_max(model->parallel_count, 1);
204
0
  ccv_nnc_tensor_t* isnanr[parallel_count * 2];
205
0
  const int stream_type = model->compiled_data->stream_type;
206
0
  int i;
207
0
  if (stream_type == CCV_STREAM_CONTEXT_GPU)
208
0
  {
209
0
    for (i = 0; i < parallel_count; i++)
210
0
    {
211
0
      ccv_nnc_tensor_param_t info = {
212
0
        .type = CCV_TENSOR_GPU_MEMORY,
213
0
        .format = CCV_TENSOR_FORMAT_NHWC,
214
0
        .datatype = CCV_32S,
215
0
        .dim = {1},
216
0
      };
217
0
      CCV_TENSOR_SET_DEVICE_ID(info.type, i);
218
0
      isnanr[i * 2] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
219
0
      isnanr[i * 2 + 1] = ccv_nnc_tensor_new(ccv_nnc_xpu_alloc(&compiled_data->xpu_alloc, i, stream_context, ccv_nnc_tensor_data_size(info)), info, 0);
220
0
    }
221
0
  } else {
222
0
    for (i = 0; i < parallel_count; i++)
223
0
    {
224
0
      ccv_nnc_tensor_param_t info = {
225
0
        .type = CCV_TENSOR_CPU_MEMORY,
226
0
        .format = CCV_TENSOR_FORMAT_NHWC,
227
0
        .datatype = CCV_32S,
228
0
        .dim = {1},
229
0
      };
230
0
      isnanr[i * 2] = ccv_nnc_tensor_new(0, info, 0);
231
0
      isnanr[i * 2 + 1] = ccv_nnc_tensor_new(0, info, 0);
232
0
    }
233
0
  }
234
  // zero out old isnanr.
235
0
  if (parallel_count > 1)
236
0
  {
237
0
    ccv_nnc_stream_context_t* streams[parallel_count];
238
0
    ccv_nnc_stream_signal_t* signal;
239
0
    if (stream_context)
240
0
      signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
241
0
    for (i = 0; i < parallel_count; i++)
242
0
    {
243
0
      const int stream_type = CCV_TENSOR_GET_MEMORY(isnanr[i * 2]->info.type) == CCV_TENSOR_GPU_MEMORY ? CCV_STREAM_CONTEXT_GPU : CCV_STREAM_CONTEXT_CPU;
244
0
      const int device_id = CCV_TENSOR_GET_DEVICE_ID(isnanr[i * 2]->info.type);
245
0
      int type = stream_type;
246
0
      CCV_STREAM_SET_DEVICE_ID(type, device_id);
247
0
      ccv_nnc_stream_context_t* const stream_0 = ccv_cnnp_compiled_data_get_stream(compiled_data, type);
248
      // Wait signal to finish.
249
0
      if (stream_context)
250
0
        ccv_nnc_stream_context_wait_signal(stream_0, signal);
251
0
      ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(isnanr[i * 2]), stream_0);
252
0
      if (stream_context)
253
0
      {
254
0
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_0);
255
0
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
256
0
      }
257
0
      streams[i] = stream_0;
258
0
    }
259
    // If this should be blocking, blocking it.
260
0
    if (!stream_context)
261
0
      for (i = 0; i < parallel_count; i++)
262
0
        if (streams[i])
263
0
          ccv_nnc_stream_context_wait(streams[i]);
264
0
  } else
265
0
    ccv_nnc_cmd_exec(CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, 0, TENSOR_LIST(isnanr[0]), stream_context);
266
  // Gather isnanr.
267
0
  ccv_nnc_cmd_t reduce_cmd = {
268
0
    .cmd = CCV_NNC_CUSTOM_FORWARD,
269
0
    .isa = &reduce_isnan_vtab,
270
0
  };
271
0
  ccv_cnnp_model_parameter_gradients_map(model, parameters, reduce_cmd, ccv_nnc_no_hint, 0, 0, 0, isnanr, parallel_count * 2, stream_context);
272
0
  for (i = 0; i < parallel_count; i++)
273
0
    ccv_nnc_tensor_free(isnanr[i * 2 + 1]);
274
0
  int retval = 0;
275
0
  if (stream_type == CCV_TENSOR_GPU_MEMORY)
276
0
  {
277
0
    ccv_nnc_tensor_param_t info = {
278
0
      .type = CCV_TENSOR_CPU_MEMORY,
279
0
      .format = CCV_TENSOR_FORMAT_NHWC,
280
0
      .datatype = CCV_32S,
281
0
      .dim = {1},
282
0
    };
283
0
    ccv_nnc_tensor_t* checknan = ccv_nnc_tensor_new(0, info, 0);
284
0
    for (i = 0; i < parallel_count; i++)
285
0
    {
286
0
      ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, TENSOR_LIST(isnanr[i * 2]), TENSOR_LIST(checknan), 0);
287
0
      if (checknan->data.i32[0] > 0)
288
0
      {
289
0
        retval = 1;
290
0
        break;
291
0
      }
292
0
    }
293
0
    ccv_nnc_tensor_free(checknan);
294
0
  } else {
295
0
    for (i = 0; i < parallel_count; i++)
296
0
      if (isnanr[i * 2]->data.i32[0] > 0)
297
0
      {
298
0
        retval = 1;
299
0
        break;
300
0
      }
301
0
  }
302
0
  for (i = 0; i < parallel_count; i++)
303
0
    ccv_nnc_tensor_free(isnanr[i * 2]);
304
0
  return retval;
305
0
}
306
307
// MARK - Core Layers
308
309
static void _ccv_cnnp_sum_build(ccv_cnnp_model_t* const self, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
310
64
{
311
64
  PRINT(CCV_CLI_VERBOSE, "[cnnp_sum_build] -\n");
312
64
  assert(output_size == 1);
313
64
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, ccv_nnc_tensor_symbol_params(graph, inputs[0]), 0);
314
64
  ccv_nnc_graph_exec_symbol_new(graph, CMD_EWSUM_FORWARD(), inputs, input_size, outputs, output_size, 0);
315
64
}
316
317
static ccv_cnnp_model_t* _ccv_cnnp_sum_copy(const ccv_cnnp_model_t* const self, void* const context);
318
319
static const ccv_cnnp_model_vtab_t ccv_cnnp_sum_isa = {
320
  .build = _ccv_cnnp_sum_build,
321
  .copy = _ccv_cnnp_sum_copy,
322
};
323
324
typedef struct {
325
  ccv_cnnp_model_t super;
326
  ccv_nnc_tensor_symbol_t output;
327
} ccv_cnnp_model_sum_t;
328
329
ccv_cnnp_model_t* ccv_cnnp_sum(const char* const name)
330
63
{
331
63
  ccv_cnnp_model_sum_t* const model_sum = (ccv_cnnp_model_sum_t*)cccalloc(1, sizeof(ccv_cnnp_model_sum_t));
332
63
  model_sum->super.isa = &ccv_cnnp_sum_isa;
333
63
  model_sum->super.input_size = 0;
334
63
  model_sum->super.outputs = &model_sum->output;
335
63
  model_sum->super.output_size = 1;
336
63
  ccv_cnnp_model_copy_name(&model_sum->super, name);
337
63
  return (ccv_cnnp_model_t*)model_sum;
338
63
}
339
340
static ccv_cnnp_model_t* _ccv_cnnp_sum_copy(const ccv_cnnp_model_t* const self, void* const context)
341
3
{
342
3
  return ccv_cnnp_sum(self->name);
343
3
}
344
345
typedef struct {
346
  ccv_cnnp_model_t super;
347
  int axis;
348
  ccv_nnc_tensor_symbol_t output;
349
} ccv_cnnp_model_concat_t;
350
351
static void _ccv_cnnp_concat_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
352
4
{
353
4
  const ccv_cnnp_model_concat_t* const self = (const ccv_cnnp_model_concat_t*)super;
354
4
  PRINT(CCV_CLI_VERBOSE, "[cnnp_concat_build] 1. -\n");
355
4
  assert(output_size == 1);
356
4
  ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
357
4
  int i, j;
358
4
  if (output_params.dim[0] == 0)
359
0
    for (i = 1; i < input_size; i++)
360
0
    {
361
0
      output_params = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
362
0
      if (output_params.dim[0] != 0)
363
0
        break;
364
0
    }
365
4
  const int nd = ccv_nnc_tensor_nd(output_params.dim);
366
4
  const int axis = self->axis;
367
4
  assert(axis < nd);
368
4
  output_params.dim[axis] = 0;
369
4
  int input_is_contiguous = 1;
370
12
  for (i = 0; i < input_size; 
i++8
)
371
8
  {
372
8
    const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
373
8
    const int input_nd = ccv_nnc_tensor_nd(input_params.dim);
374
8
    if (input_nd == 0)
375
0
    {
376
0
      PRINT(CCV_CLI_VERBOSE, "[cnnp_concat_build] %d. input[%d]: -\n", i + 2, i);
377
0
      input_is_contiguous = 0;
378
0
      continue;
379
0
    }
380
8
    if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
381
0
    {
382
0
      PRINT(CCV_CLI_VERBOSE, "[cnnp_concat_build] %d. input[%d]: (%d", i + 2, i, input_params.dim[0]);
383
0
      int i;
384
0
      for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && input_params.dim[i] > 0; i++)
385
0
        PRINT(CCV_CLI_VERBOSE, ", %d", input_params.dim[i]);
386
0
      PRINT(CCV_CLI_VERBOSE, ")\n");
387
0
    }
388
8
    assert(input_nd == nd);
389
16
    
for (j = 0; 8
j < nd;
j++8
)
390
8
      if (j != axis)
391
0
        { assert(input_params.dim[j] == output_params.dim[j]); }
392
8
    output_params.dim[axis] += input_params.dim[axis];
393
8
  }
394
4
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
395
4
  int ofs[CCV_NNC_MAX_DIM_ALLOC] = {};
396
4
  int stride[CCV_NNC_MAX_DIM_ALLOC] = {};
397
4
  ccv_nnc_tensor_get_stride(output_params.dim, stride);
398
4
  if (input_is_contiguous)
399
4
  {
400
4
    ccv_nnc_tensor_symbol_t aliases[input_size];
401
12
    for (i = 0; i < input_size; 
i++8
)
402
8
    {
403
8
      const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
404
8
      aliases[i] = ccv_nnc_tensor_symbol_alias_new(graph, outputs[0], ofs, stride, input_params, 0);
405
8
      ofs[axis] += input_params.dim[axis];
406
8
    }
407
    // Format transform is more flexible.
408
4
    ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, input_size, aliases, input_size, "concat");
409
4
  } else {
410
0
    ccv_nnc_tensor_symbol_t aliases[input_size];
411
0
    for (i = 0; i < input_size; i++)
412
0
    {
413
0
      const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
414
0
      if (input_params.dim[0] == 0)
415
0
      {
416
        // Create a new alias anyway, but not going to use it, in this way, the alias count will match during absorb.
417
0
        aliases[i] = ccv_nnc_tensor_symbol_alias_new(graph, outputs[0], ofs, stride, input_params, 0);
418
0
        continue;
419
0
      }
420
0
      aliases[i] = ccv_nnc_tensor_symbol_alias_new(graph, outputs[0], ofs, stride, input_params, 0);
421
0
      ofs[axis] += input_params.dim[axis];
422
0
    }
423
    // Format transform is more flexible.
424
0
    ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, input_size, aliases, input_size, "concat");
425
0
  }
426
4
}
427
428
static ccv_cnnp_model_t* _ccv_cnnp_concat_copy(const ccv_cnnp_model_t* const self, void* const context);
429
430
static const ccv_cnnp_model_vtab_t ccv_cnnp_concat_isa = {
431
  .build = _ccv_cnnp_concat_build,
432
  .copy = _ccv_cnnp_concat_copy,
433
};
434
435
ccv_cnnp_model_t* ccv_cnnp_concat(const int axis, const char* const name)
436
4
{
437
4
  ccv_cnnp_model_concat_t* const model_concat = (ccv_cnnp_model_concat_t*)cccalloc(1, sizeof(ccv_cnnp_model_concat_t));
438
4
  model_concat->super.isa = &ccv_cnnp_concat_isa;
439
4
  model_concat->super.input_size = 0;
440
4
  model_concat->super.outputs = &model_concat->output;
441
4
  model_concat->super.output_size = 1;
442
4
  model_concat->axis = axis;
443
4
  ccv_cnnp_model_copy_name(&model_concat->super, name);
444
4
  return (ccv_cnnp_model_t*)model_concat;
445
4
}
446
447
static ccv_cnnp_model_t* _ccv_cnnp_concat_copy(const ccv_cnnp_model_t* const super, void* const context)
448
0
{
449
0
  const ccv_cnnp_model_concat_t* const self = (const ccv_cnnp_model_concat_t*)super;
450
0
  return ccv_cnnp_concat(self->axis, self->super.name);
451
0
}
452
453
typedef struct {
454
  ccv_cnnp_model_t super;
455
  int axis;
456
  ccv_nnc_tensor_symbol_t outputs[1];
457
} ccv_cnnp_model_chunk_t;
458
459
static void _ccv_cnnp_chunk_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
460
2
{
461
2
  const ccv_cnnp_model_concat_t* const self = (const ccv_cnnp_model_concat_t*)super;
462
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_chunk_build] 1. axis: %d\n", self->axis);
463
2
  assert(input_size == 1);
464
2
  const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
465
2
  if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
466
0
  {
467
0
    PRINT(CCV_CLI_VERBOSE, "[cnnp_chunk_build] 2. input: (%d", input_params.dim[0]);
468
0
    int i;
469
0
    for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && input_params.dim[i] > 0; i++)
470
0
      PRINT(CCV_CLI_VERBOSE, ", %d", input_params.dim[i]);
471
0
    PRINT(CCV_CLI_VERBOSE, ")\n");
472
0
  }
473
2
  ccv_nnc_tensor_param_t output_params = input_params;
474
2
  int i;
475
2
  const int nd = ccv_nnc_tensor_nd(output_params.dim);
476
2
  const int axis = self->axis;
477
2
  assert(axis < nd);
478
2
  const int n = self->super.output_size;
479
2
  assert(n == output_size);
480
2
  assert(output_params.dim[axis] % n == 0);
481
2
  output_params.dim[axis] = output_params.dim[axis] / n;
482
2
  int ofs[CCV_NNC_MAX_DIM_ALLOC] = {};
483
2
  int stride[CCV_NNC_MAX_DIM_ALLOC] = {};
484
2
  ccv_nnc_tensor_get_stride(input_params.dim, stride);
485
2
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
486
2
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
487
2
  {
488
6
    for (i = 0; i < output_size; 
i++4
)
489
4
    {
490
4
      outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ofs, stride, output_params, 0);
491
4
      ofs[axis] += output_params.dim[axis];
492
4
    }
493
2
  } else {
494
    // Otherwise, we need to check if it is permute. For permute, we cannot do alias directly.
495
    // We need to first materialize the permute and then run reshape on top of it, otherwise it will be wrong.
496
0
    int old_stride[CCV_NNC_MAX_DIM_ALLOC];
497
0
    ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], 0, old_stride);
498
    // We identify permute by checking if the stride is not in descending order.
499
    // This also covered "permute" through reshape, rather than using ccv_cnnp_permute directly.
500
0
    int i, no_permute = 1;
501
0
    for (i = 1; no_permute && i < nd; i++)
502
0
      if (old_stride[i - 1] < old_stride[i])
503
0
        no_permute = 0;
504
0
    if (no_permute)
505
0
    { // Just straightforward reshape if there is no no permute.
506
0
      for (i = 0; i < output_size; i++)
507
0
      {
508
0
        outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ofs, old_stride, output_params, 0);
509
0
        ofs[axis] += output_params.dim[axis];
510
0
      }
511
0
    } else {
512
      // Otherwise, we first do format transform to plain tensor and then do reshape.
513
0
      ccv_nnc_tensor_symbol_t permuted = ccv_nnc_tensor_symbol_new(graph, input_params, 0);
514
0
      ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(permuted), "reshape");
515
0
      for (i = 0; i < output_size; i++)
516
0
      {
517
0
        outputs[i] = ccv_nnc_tensor_symbol_alias_new(graph, permuted, ofs, stride, output_params, 0);
518
0
        ofs[axis] += output_params.dim[axis];
519
0
      }
520
0
    }
521
0
  }
522
2
}
523
524
static ccv_cnnp_model_t* _ccv_cnnp_chunk_copy(const ccv_cnnp_model_t* const self, void* const context);
525
526
static const ccv_cnnp_model_vtab_t ccv_cnnp_chunk_isa = {
527
  .build = _ccv_cnnp_chunk_build,
528
  .copy = _ccv_cnnp_chunk_copy,
529
};
530
531
ccv_cnnp_model_t* ccv_cnnp_chunk(const int n, const int axis, const char* const name)
532
2
{
533
2
  assert(n >= 1);
534
2
  ccv_cnnp_model_chunk_t* const model_chunk = (ccv_cnnp_model_chunk_t*)cccalloc(1, sizeof(ccv_cnnp_model_chunk_t) + sizeof(ccv_nnc_tensor_symbol_t) * (n - 1));
535
2
  model_chunk->super.isa = &ccv_cnnp_chunk_isa;
536
2
  model_chunk->super.input_size = 1;
537
2
  model_chunk->super.outputs = model_chunk->outputs;
538
2
  model_chunk->super.output_size = n;
539
2
  model_chunk->axis = axis;
540
2
  ccv_cnnp_model_copy_name(&model_chunk->super, name);
541
2
  return (ccv_cnnp_model_t*)model_chunk;
542
2
}
543
544
static ccv_cnnp_model_t* _ccv_cnnp_chunk_copy(const ccv_cnnp_model_t* const super, void* const context)
545
0
{
546
0
  const ccv_cnnp_model_chunk_t* const self = (const ccv_cnnp_model_chunk_t*)super;
547
0
  return ccv_cnnp_chunk(self->super.output_size, self->axis, self->super.name);
548
0
}
549
550
typedef struct {
551
  ccv_cnnp_model_t super;
552
  ccv_nnc_tensor_symbol_t output;
553
  int format;
554
  int dim[CCV_NNC_MAX_DIM_ALLOC];
555
  int ofs[CCV_NNC_MAX_DIM_ALLOC];
556
  int stride[CCV_NNC_MAX_DIM_ALLOC];
557
} ccv_cnnp_model_reshape_t;
558
559
static void _ccv_cnnp_reshape_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
560
1.06k
{
561
1.06k
  assert(input_size == 1);
562
1.06k
  assert(output_size == 1);
563
1.06k
  ccv_cnnp_model_reshape_t* const self = (ccv_cnnp_model_reshape_t*)super;
564
1.06k
  if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
565
0
  {
566
0
    PRINT(CCV_CLI_VERBOSE, "[cnnp_reshape_build] 1. dim: (%d", self->dim[0]);
567
0
    int i;
568
0
    for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && self->dim[i] > 0; i++)
569
0
      PRINT(CCV_CLI_VERBOSE, ", %d", self->dim[i]);
570
0
    const int count = i;
571
0
    PRINT(CCV_CLI_VERBOSE, "), ofs: (%d", self->ofs[0]);
572
0
    for (i = 1; i < count; i++)
573
0
      PRINT(CCV_CLI_VERBOSE, ", %d", self->ofs[i]);
574
0
    PRINT(CCV_CLI_VERBOSE, "), stride: (%d", self->stride[0]);
575
0
    for (i = 1; i < count; i++)
576
0
      PRINT(CCV_CLI_VERBOSE, ", %d", self->stride[i]);
577
0
    PRINT(CCV_CLI_VERBOSE, ")\n");
578
0
  }
579
1.06k
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
580
1.06k
  if (CCV_CLI_OUTPUT_LEVEL_IS(CCV_CLI_VERBOSE))
581
0
  {
582
0
    PRINT(CCV_CLI_VERBOSE, "[cnnp_reshape_build] 2. input: (%d", params.dim[0]);
583
0
    int i;
584
0
    for (i = 1; i < CCV_NNC_MAX_DIM_ALLOC && params.dim[i] > 0; i++)
585
0
      PRINT(CCV_CLI_VERBOSE, ", %d", params.dim[i]);
586
0
    PRINT(CCV_CLI_VERBOSE, ")\n");
587
0
  }
588
1.06k
  assert(ccv_nnc_dimension_count(self->dim) <= ccv_nnc_tensor_count(params));
589
1.06k
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
590
1.06k
  int stride_from_dim[CCV_NNC_MAX_DIM_ALLOC];
591
1.06k
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
592
1.06k
  {
593
1.06k
    memcpy(params.dim, self->dim, sizeof(params.dim));
594
1.06k
    int* stride;
595
1.06k
    if (self->stride[0] == 0)
596
1.06k
    {
597
1.06k
      ccv_nnc_tensor_get_stride(self->dim, stride_from_dim);
598
1.06k
      stride = stride_from_dim;
599
1.06k
    } else
600
5
      stride = self->stride;
601
1.06k
    if (self->format > 0)
602
5
      params.format = self->format;
603
1.06k
    outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], self->ofs, stride, params, 0);
604
1.06k
  } else {
605
    // Otherwise, we need to check if it is permute. For permute, we cannot do alias directly.
606
    // We need to first materialize the permute and then run reshape on top of it, otherwise it will be wrong.
607
1
    int old_stride[CCV_NNC_MAX_DIM_ALLOC];
608
1
    ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], 0, old_stride);
609
    // We identify permute by checking if the stride is not in descending order.
610
    // This also covered "permute" through reshape, rather than using ccv_cnnp_permute directly.
611
1
    const int nd = ccv_nnc_tensor_nd(params.dim);
612
1
    const int new_nd = ccv_nnc_tensor_nd(self->dim);
613
1
    int i, no_permute = 1;
614
    // If the new dim has different nd, or we actually have a stride, we need to check if it is no permute or not.
615
1
    if (new_nd != nd || 
(0
self->stride[0] != 00
&&
memcmp(self->stride, old_stride, sizeof(self->stride))0
))
616
2
      
for (i = 1; 1
no_permute &&
i < nd1
;
i++1
)
617
1
        if (old_stride[i - 1] < old_stride[i])
618
1
          no_permute = 0;
619
1
    if (no_permute)
620
0
    { // Just straightforward reshape if there is no no permute.
621
0
      memcpy(params.dim, self->dim, sizeof(params.dim));
622
0
      int* stride;
623
0
      if (self->stride[0] == 0)
624
0
      {
625
0
        if (new_nd != nd) // Cannot use old stride.
626
0
        {
627
0
          ccv_nnc_tensor_get_stride(self->dim, stride_from_dim);
628
0
          stride = stride_from_dim;
629
0
        } else
630
0
          stride = old_stride;
631
0
      } else
632
0
        stride = self->stride;
633
0
      if (self->format > 0)
634
0
        params.format = self->format;
635
0
      outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], self->ofs, stride, params, 0);
636
1
    } else {
637
      // Otherwise, we first do format transform to plain tensor and then do reshape.
638
1
      ccv_nnc_tensor_symbol_t permuted = ccv_nnc_tensor_symbol_new(graph, params, 0);
639
1
      ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(permuted), "reshape");
640
1
      memcpy(params.dim, self->dim, sizeof(params.dim));
641
1
      int* stride;
642
1
      if (self->stride[0] == 0)
643
1
      {
644
1
        ccv_nnc_tensor_get_stride(self->dim, stride_from_dim);
645
1
        stride = stride_from_dim;
646
1
      } else
647
0
        stride = self->stride;
648
1
      if (self->format > 0)
649
0
        params.format = self->format;
650
      // And then we create alias against the permuted one.
651
1
      outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, permuted, self->ofs, stride, params, 0);
652
1
    }
653
1
  }
654
1.06k
}
655
656
static ccv_cnnp_model_t* _ccv_cnnp_reshape_copy(const ccv_cnnp_model_t* const super, void* const context);
657
658
static const ccv_cnnp_model_vtab_t ccv_cnnp_reshape_isa = {
659
  .build = _ccv_cnnp_reshape_build,
660
  .copy = _ccv_cnnp_reshape_copy,
661
};
662
663
ccv_cnnp_model_t* ccv_cnnp_reshape(const int format, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC], const char* const name)
664
1.06k
{
665
1.06k
  ccv_cnnp_model_reshape_t* const model_reshape = (ccv_cnnp_model_reshape_t*)cccalloc(1, sizeof(ccv_cnnp_model_reshape_t));
666
1.06k
  model_reshape->super.isa = &ccv_cnnp_reshape_isa;
667
1.06k
  model_reshape->super.input_size = 1;
668
1.06k
  model_reshape->super.outputs = &model_reshape->output;
669
1.06k
  model_reshape->super.output_size = 1;
670
1.06k
  ccv_cnnp_model_copy_name(&model_reshape->super, name);
671
1.06k
  model_reshape->format = format;
672
1.06k
  memcpy(model_reshape->dim, dim, sizeof(model_reshape->dim));
673
1.06k
  memcpy(model_reshape->ofs, ofs, sizeof(model_reshape->ofs));
674
1.06k
  if (stride[0] != 0)
675
5
    memcpy(model_reshape->stride, stride, sizeof(model_reshape->stride));
676
1.06k
  return (ccv_cnnp_model_t*)model_reshape;
677
1.06k
}
678
679
static ccv_cnnp_model_t* _ccv_cnnp_reshape_copy(const ccv_cnnp_model_t* const super, void* const context)
680
1.00k
{
681
1.00k
  const ccv_cnnp_model_reshape_t* const self = (const ccv_cnnp_model_reshape_t*)super;
682
1.00k
  return ccv_cnnp_reshape(self->format, self->dim, self->ofs, self->stride, self->super.name);
683
1.00k
}
684
685
typedef struct {
686
  ccv_cnnp_model_t super;
687
  ccv_nnc_tensor_symbol_t output;
688
  int type;
689
  int begin[CCV_NNC_MAX_DIM_ALLOC];
690
  int end[CCV_NNC_MAX_DIM_ALLOC];
691
} ccv_cnnp_model_pad_t;
692
693
static void _ccv_cnnp_pad_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
694
1
{
695
1
  assert(input_size == 1);
696
1
  assert(output_size == 1);
697
1
  ccv_cnnp_model_pad_t* const self = (ccv_cnnp_model_pad_t*)super;
698
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_pad_build] -\n");
699
1
  const ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
700
1
  const int nd = ccv_nnc_tensor_nd(input_params.dim);
701
1
  ccv_nnc_tensor_param_t params = input_params;
702
1
  int i;
703
5
  for (i = 0 ; i < nd; 
i++4
)
704
4
    params.dim[i] += self->begin[i] + self->end[i];
705
1
  const ccv_nnc_tensor_symbol_t padded = ccv_nnc_tensor_symbol_new(graph, params, 0);
706
1
  ccv_nnc_cmd_t pad = CMD_PAD_FORWARD(self->type, (), ());
707
1
  memcpy(pad.info.size.dim, self->begin, sizeof(pad.info.size.dim));
708
1
  memcpy(pad.info.pad.end, self->end, sizeof(pad.info.pad.end));
709
1
  ccv_nnc_graph_exec_symbol_new(graph, pad, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(padded), "pad");
710
1
  outputs[0] = padded;
711
1
}
712
713
static ccv_cnnp_model_t* _ccv_cnnp_pad_copy(const ccv_cnnp_model_t* const super, void* const context);
714
715
static const ccv_cnnp_model_vtab_t ccv_cnnp_pad_isa = {
716
  .build = _ccv_cnnp_pad_build,
717
  .copy = _ccv_cnnp_pad_copy,
718
};
719
720
ccv_cnnp_model_t* ccv_cnnp_pad(const int type, const int begin[CCV_NNC_MAX_DIM_ALLOC], const int end[CCV_NNC_MAX_DIM_ALLOC], const char* const name)
721
1
{
722
1
  ccv_cnnp_model_pad_t* const model_pad = (ccv_cnnp_model_pad_t*)cccalloc(1, sizeof(ccv_cnnp_model_pad_t));
723
1
  model_pad->super.isa = &ccv_cnnp_pad_isa;
724
1
  model_pad->super.input_size = 1;
725
1
  model_pad->super.outputs = &model_pad->output;
726
1
  model_pad->super.output_size = 1;
727
1
  ccv_cnnp_model_copy_name(&model_pad->super, name);
728
1
  model_pad->type = type;
729
1
  memcpy(model_pad->begin, begin, sizeof(model_pad->begin));
730
1
  memcpy(model_pad->end, end, sizeof(model_pad->end));
731
1
  return (ccv_cnnp_model_t*)model_pad;
732
1
}
733
734
static ccv_cnnp_model_t* _ccv_cnnp_pad_copy(const ccv_cnnp_model_t* const super, void* const context)
735
0
{
736
0
  const ccv_cnnp_model_pad_t* const self = (const ccv_cnnp_model_pad_t*)super;
737
0
  return ccv_cnnp_pad(self->type, self->begin, self->end, self->super.name);
738
0
}
739
740
typedef struct {
741
  ccv_cnnp_model_t super;
742
  ccv_nnc_tensor_symbol_t output;
743
} ccv_cnnp_model_identity_t;
744
745
static void _ccv_cnnp_identity_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
746
0
{
747
0
  assert(input_size == 1);
748
0
  assert(output_size == 1);
749
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_identity_build] -\n");
750
0
  outputs[0] = inputs[0];
751
0
}
752
753
static ccv_cnnp_model_t* _ccv_cnnp_identity_copy(const ccv_cnnp_model_t* const super, void* const context);
754
755
static const ccv_cnnp_model_vtab_t ccv_cnnp_identity_isa = {
756
  .build = _ccv_cnnp_identity_build,
757
  .copy = _ccv_cnnp_identity_copy,
758
};
759
760
ccv_cnnp_model_t* ccv_cnnp_identity(const char* const name)
761
0
{
762
0
  ccv_cnnp_model_identity_t* const model_identity = (ccv_cnnp_model_identity_t*)cccalloc(1, sizeof(ccv_cnnp_model_identity_t));
763
0
  model_identity->super.isa = &ccv_cnnp_identity_isa;
764
0
  model_identity->super.input_size = 1;
765
0
  model_identity->super.outputs = &model_identity->output;
766
0
  model_identity->super.output_size = 1;
767
0
  ccv_cnnp_model_copy_name(&model_identity->super, name);
768
0
  return (ccv_cnnp_model_t*)model_identity;
769
0
}
770
771
static ccv_cnnp_model_t* _ccv_cnnp_identity_copy(const ccv_cnnp_model_t* const super, void* const context)
772
0
{
773
0
  const ccv_cnnp_model_identity_t* const self = (const ccv_cnnp_model_identity_t*)super;
774
0
  return ccv_cnnp_identity(self->super.name);
775
0
}
776
777
typedef struct {
778
  ccv_cnnp_model_t super;
779
  ccv_nnc_tensor_symbol_t output;
780
  int index[CCV_NNC_MAX_DIM_ALLOC];
781
} ccv_cnnp_model_permute_t;
782
783
static void _ccv_cnnp_permute_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
784
1
{
785
1
  assert(input_size == 1);
786
1
  assert(output_size == 1);
787
1
  ccv_cnnp_model_permute_t* const self = (ccv_cnnp_model_permute_t*)super;
788
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_permute_build] -\n");
789
1
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
790
1
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
791
1
  const int nd = ccv_nnc_tensor_nd(params.dim);
792
1
  int input_dim[CCV_NNC_MAX_DIM_ALLOC];
793
1
  memcpy(input_dim, params.dim, sizeof(params.dim));
794
1
  int input_stride[CCV_NNC_MAX_DIM_ALLOC] = {};
795
1
  int output_stride[CCV_NNC_MAX_DIM_ALLOC] = {};
796
1
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If it is not an alias. Find stride and permute.
797
0
  {
798
0
    ccv_nnc_tensor_get_stride(input_dim, input_stride);
799
0
    int i;
800
0
    for (i = 0; i < nd; i++)
801
0
    {
802
0
      const int idx = self->index[i];
803
0
      assert(idx >= 0 && idx < nd);
804
0
      params.dim[i] = input_dim[idx];
805
0
      output_stride[i] = input_stride[idx];
806
0
    }
807
0
    outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ccv_nnc_no_ofs, output_stride, params, 0);
808
1
  } else {
809
    // if it is an alias, we can get the stride from it and use that.
810
1
    int input_ofs[CCV_NNC_MAX_DIM_ALLOC];
811
1
    ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], input_ofs, input_stride);
812
1
    assert(input_stride[0] != 0);
813
1
    int output_ofs[CCV_NNC_MAX_DIM_ALLOC] = {};
814
1
    int i;
815
4
    for (i = 0; i < nd; 
i++3
)
816
3
    {
817
3
      const int idx = self->index[i];
818
3
      assert(idx >= 0 && idx < nd);
819
3
      params.dim[i] = input_dim[idx];
820
3
      output_stride[i] = input_stride[idx];
821
3
      output_ofs[i] = input_ofs[idx];
822
3
    }
823
1
    outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], output_ofs, output_stride, params, 0);
824
1
  }
825
1
}
826
827
static ccv_cnnp_model_t* _ccv_cnnp_permute_copy(const ccv_cnnp_model_t* const super, void* const context);
828
829
static const ccv_cnnp_model_vtab_t ccv_cnnp_permute_isa = {
830
  .build = _ccv_cnnp_permute_build,
831
  .copy = _ccv_cnnp_permute_copy,
832
};
833
834
ccv_cnnp_model_t* ccv_cnnp_permute(const int index[CCV_NNC_MAX_DIM_ALLOC], const char* const name)
835
1
{
836
1
  ccv_cnnp_model_permute_t* const model_permute = (ccv_cnnp_model_permute_t*)cccalloc(1, sizeof(ccv_cnnp_model_permute_t));
837
1
  model_permute->super.isa = &ccv_cnnp_permute_isa;
838
1
  model_permute->super.input_size = 1;
839
1
  model_permute->super.outputs = &model_permute->output;
840
1
  model_permute->super.output_size = 1;
841
1
  ccv_cnnp_model_copy_name(&model_permute->super, name);
842
1
  memcpy(model_permute->index, index, sizeof(model_permute->index));
843
1
  return (ccv_cnnp_model_t*)model_permute;
844
1
}
845
846
static ccv_cnnp_model_t* _ccv_cnnp_permute_copy(const ccv_cnnp_model_t* const super, void* const context)
847
0
{
848
0
  const ccv_cnnp_model_permute_t* const self = (const ccv_cnnp_model_permute_t*)super;
849
0
  return ccv_cnnp_permute(self->index, self->super.name);
850
0
}
851
852
typedef struct {
853
  ccv_cnnp_model_t super;
854
  int index;
855
  ccv_nnc_tensor_symbol_t output;
856
} ccv_cnnp_model_extract_t;
857
858
static void _ccv_cnnp_extract_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
859
6
{
860
6
  assert(output_size == 1);
861
6
  ccv_cnnp_model_extract_t* const self = (ccv_cnnp_model_extract_t*)super;
862
6
  PRINT(CCV_CLI_VERBOSE, "[cnnp_extract_build] index: %d\n", self->index);
863
6
  outputs[0] = inputs[self->index];
864
6
}
865
866
static ccv_cnnp_model_t* _ccv_cnnp_extract_copy(const ccv_cnnp_model_t* const self, void* const context);
867
868
static const ccv_cnnp_model_vtab_t ccv_cnnp_extract_isa = {
869
  .build = _ccv_cnnp_extract_build,
870
  .copy = _ccv_cnnp_extract_copy,
871
};
872
873
ccv_cnnp_model_t* ccv_cnnp_extract(const int index, const char* const name)
874
6
{
875
6
  ccv_cnnp_model_extract_t* const model_extract = (ccv_cnnp_model_extract_t*)cccalloc(1, sizeof(ccv_cnnp_model_extract_t));
876
6
  model_extract->index = index;
877
6
  model_extract->super.isa = &ccv_cnnp_extract_isa;
878
6
  model_extract->super.input_size = 0;
879
6
  model_extract->super.outputs = &model_extract->output;
880
6
  model_extract->super.output_size = 1;
881
6
  ccv_cnnp_model_copy_name(&model_extract->super, name);
882
6
  return (ccv_cnnp_model_t*)model_extract;
883
6
}
884
885
static ccv_cnnp_model_t* _ccv_cnnp_extract_copy(const ccv_cnnp_model_t* const super, void* const context)
886
0
{
887
0
  ccv_cnnp_model_extract_t* const self = (ccv_cnnp_model_extract_t*)super;
888
0
  return ccv_cnnp_extract(self->index, self->super.name);
889
0
}
890
891
typedef struct {
892
  ccv_cnnp_model_t super;
893
  ccv_nnc_tensor_symbol_t output;
894
} ccv_cnnp_model_flatten_t;
895
896
static void _ccv_cnnp_flatten_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
897
10
{
898
10
  PRINT(CCV_CLI_VERBOSE, "[cnnp_flatten_build] -\n");
899
10
  assert(input_size == 1);
900
10
  assert(output_size == 1);
901
10
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
902
10
  ccv_nnc_tensor_param_t output_params = params;
903
10
  memset(output_params.dim, 0, sizeof(output_params.dim));
904
10
  output_params.dim[0] = ccv_nnc_tensor_get_n(params);
905
10
  assert(output_params.dim[0] > 0);
906
10
  output_params.dim[1] = ccv_nnc_tensor_count(params) / output_params.dim[0];
907
10
  int stride[CCV_NNC_MAX_DIM_ALLOC] = {};
908
10
  ccv_nnc_tensor_get_stride(output_params.dim, stride);
909
10
  outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], DIM_ALLOC(), stride, output_params, 0);
910
10
}
911
912
static ccv_cnnp_model_t* _ccv_cnnp_flatten_copy(const ccv_cnnp_model_t* const self, void* const context);
913
914
static const ccv_cnnp_model_vtab_t ccv_cnnp_flatten_isa = {
915
  .build = _ccv_cnnp_flatten_build,
916
  .copy = _ccv_cnnp_flatten_copy,
917
};
918
919
ccv_cnnp_model_t* ccv_cnnp_flatten(const char* const name)
920
12
{
921
12
  ccv_cnnp_model_flatten_t* const model_flatten = (ccv_cnnp_model_flatten_t*)cccalloc(1, sizeof(ccv_cnnp_model_flatten_t));
922
12
  model_flatten->super.isa = &ccv_cnnp_flatten_isa;
923
12
  model_flatten->super.input_size = 1;
924
12
  model_flatten->super.outputs = &model_flatten->output;
925
12
  model_flatten->super.output_size = 1;
926
12
  ccv_cnnp_model_copy_name(&model_flatten->super, name);
927
12
  return (ccv_cnnp_model_t*)model_flatten;
928
12
}
929
930
static ccv_cnnp_model_t* _ccv_cnnp_flatten_copy(const ccv_cnnp_model_t* const self, void* const context)
931
2
{
932
2
  return ccv_cnnp_flatten(self->name);
933
2
}
934
935
// MARK - Batch Norm Layer
936
937
typedef struct {
938
  ccv_cnnp_model_t super;
939
  ccv_nnc_tensor_symbol_t output;
940
  ccv_nnc_tensor_symbol_t bias;
941
  ccv_nnc_tensor_symbol_t scale;
942
  ccv_nnc_graph_exec_symbol_t batch_norm;
943
  ccv_nnc_cmd_param_t params;
944
  ccv_array_t* zero_inits;
945
  ccv_array_t* retainables;
946
} ccv_cnnp_model_batch_norm_t;
947
948
static void _ccv_cnnp_batch_norm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
949
75
{
950
75
  assert(input_size == 1);
951
75
  assert(output_size == 1);
952
75
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
953
75
  PRINT(CCV_CLI_VERBOSE, "[cnnp_batch_norm_build] -\n");
954
75
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
955
75
  const int nd = ccv_nnc_tensor_nd(params.dim);
956
75
  ccv_nnc_tensor_param_t bias_params = params;
957
75
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
958
  // If the accuracy is not enough, bump it to 32-bit floating point.
959
75
  if (bias_params.datatype != CCV_32F && 
bias_params.datatype != CCV_64F16
)
960
16
    bias_params.datatype = CCV_32F;
961
75
  bias_params.dim[0] = nd > 1 ? ccv_nnc_tensor_get_c(params) : 
params.dim[0]0
;
962
75
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, params, 0);
963
  // Both scale and bias are shared between if this model is reused.
964
75
  if (!self->scale.graph)
965
75
    self->scale = ccv_nnc_tensor_symbol_new(graph, bias_params, "scale");
966
75
  if (!self->bias.graph)
967
75
    self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
968
75
  const ccv_nnc_tensor_symbol_t mean = ccv_nnc_tensor_symbol_new(graph, bias_params, "mean");
969
75
  const ccv_nnc_tensor_symbol_t var = ccv_nnc_tensor_symbol_new(graph, bias_params, "var");
970
  // Otherwise, notice mean, var, saved_mean, saved_inv_std are not reused.
971
75
  if (!self->zero_inits)
972
75
    self->zero_inits = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
973
75
  ccv_array_push(self->zero_inits, &mean);
974
75
  ccv_array_push(self->zero_inits, &var);
975
75
  const ccv_nnc_tensor_symbol_t out_mean = ccv_nnc_tensor_symbol_new(graph, bias_params, "out_mean");
976
75
  const ccv_nnc_tensor_symbol_t out_var = ccv_nnc_tensor_symbol_new(graph, bias_params, "out_var");
977
75
  if (!self->retainables)
978
75
    self->retainables = ccv_array_new(sizeof(ccv_nnc_tensor_symbol_t), 0, 0);
979
75
  ccv_array_push(self->retainables, &out_mean);
980
75
  ccv_array_push(self->retainables, &out_var);
981
75
  const ccv_nnc_tensor_symbol_t saved_mean = ccv_nnc_tensor_symbol_new(graph, bias_params, "saved_mean");
982
75
  const ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(graph, bias_params, "saved_inv_std");
983
75
  const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim), CCV_NNC_MAX_DIM);
984
75
  ccv_nnc_cmd_param_t batch_norm = self->params;
985
75
  batch_norm.bnorm.count = hw >= 0 ? CCV_NNC_MAX_DIM + 1 : 
10
;
986
75
  int i;
987
75
  batch_norm.bnorm.axis[0] = (params.format == CCV_TENSOR_FORMAT_CHWN) ? 
30
: 0;
988
75
  if (hw >= 0)
989
225
    
for (i = 0; 75
i < CCV_NNC_MAX_DIM;
i++150
)
990
150
      batch_norm.bnorm.axis[i + 1] = i + hw;
991
75
  self->params = batch_norm;
992
75
  self->batch_norm = ccv_nnc_graph_exec_symbol_new(graph, ccv_nnc_cmd(CCV_NNC_BATCH_NORM_FORWARD, 0, batch_norm, 0), TENSOR_SYMBOL_LIST(inputs[0], self->scale, self->bias, mean, var), TENSOR_SYMBOL_LIST(output, out_mean, out_var, saved_mean, saved_inv_std), "batch_norm");
993
75
  outputs[0] = output;
994
75
}
995
996
static void _ccv_cnnp_batch_norm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
997
24
{
998
24
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
999
24
  if (self->scale.graph)
1000
24
    initializer(context, CMD_RANDOM_UNIFORM_FORWARD(0, 1), ccv_nnc_no_hint, 0, 0, self->scale);
1001
24
  if (self->bias.graph)
1002
24
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
1003
24
  int i;
1004
24
  if (self->zero_inits)
1005
72
    
for (i = 0; 24
i < self->zero_inits->rnum;
i++48
)
1006
48
      initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, *(ccv_nnc_tensor_symbol_t*)ccv_array_get(self->zero_inits, i));
1007
24
}
1008
1009
static void _ccv_cnnp_batch_norm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
1010
75
{
1011
75
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
1012
75
  if (self->scale.graph)
1013
75
    add_to_array(parameters, self->scale, is_trainable);
1014
75
  if (self->bias.graph)
1015
75
    add_to_array(parameters, self->bias, is_trainable);
1016
75
}
1017
1018
static void _ccv_cnnp_batch_norm_add_to_output(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const outputs)
1019
75
{
1020
75
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
1021
75
  int i;
1022
75
  if (self->retainables)
1023
225
    
for (i = 0; 75
i < self->retainables->rnum;
i++150
)
1024
150
    {
1025
150
      const ccv_nnc_tensor_symbol_t symbol = *(ccv_nnc_tensor_symbol_t*)ccv_array_get(self->retainables, i);
1026
150
      add_to_array(outputs, symbol, 0);
1027
150
    }
1028
75
}
1029
1030
static void _ccv_cnnp_batch_norm_set_is_test(ccv_cnnp_model_t* const super, const int is_test, const ccv_cnnp_cmd_updater_f updater, void* const context)
1031
32
{
1032
32
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
1033
32
  if (self->batch_norm.graph)
1034
32
  {
1035
32
    self->params.bnorm.is_test = is_test;
1036
32
    updater(context, self->batch_norm, ccv_nnc_cmd(CCV_NNC_BATCH_NORM_FORWARD, 0, self->params, 0), ccv_nnc_no_hint);
1037
32
  }
1038
32
}
1039
1040
static void _ccv_cnnp_batch_norm_deinit(ccv_cnnp_model_t* const super)
1041
83
{
1042
83
  ccv_cnnp_model_batch_norm_t* const self = (ccv_cnnp_model_batch_norm_t*)super;
1043
83
  if (self->zero_inits)
1044
75
    ccv_array_free(self->zero_inits);
1045
83
  if (self->retainables)
1046
75
    ccv_array_free(self->retainables);
1047
83
}
1048
1049
static ccv_cnnp_model_t* _ccv_cnnp_batch_norm_copy(const ccv_cnnp_model_t* const super, void* const context);
1050
1051
static const ccv_cnnp_model_vtab_t ccv_cnnp_batch_norm_isa = {
1052
  .build = _ccv_cnnp_batch_norm_build,
1053
  .init_states = _ccv_cnnp_batch_norm_init_states,
1054
  .add_to_parameter = _ccv_cnnp_batch_norm_add_to_parameter,
1055
  .add_to_output = _ccv_cnnp_batch_norm_add_to_output,
1056
  .copy = _ccv_cnnp_batch_norm_copy,
1057
  .set_is_test = _ccv_cnnp_batch_norm_set_is_test,
1058
  .deinit = _ccv_cnnp_batch_norm_deinit,
1059
};
1060
1061
ccv_cnnp_model_t* ccv_cnnp_batch_norm(const float momentum, const float epsilon, const int is_trainable, const char* const name)
1062
83
{
1063
83
  ccv_cnnp_model_batch_norm_t* const model_batch_norm = (ccv_cnnp_model_batch_norm_t*)cccalloc(1, sizeof(ccv_cnnp_model_batch_norm_t));
1064
83
  model_batch_norm->super.isa = &ccv_cnnp_batch_norm_isa;
1065
83
  model_batch_norm->super.input_size = 1;
1066
83
  model_batch_norm->super.outputs = &model_batch_norm->output;
1067
83
  model_batch_norm->super.output_size = 1;
1068
83
  model_batch_norm->super.is_trainable = is_trainable;
1069
83
  ccv_cnnp_model_copy_name(&model_batch_norm->super, name);
1070
83
  model_batch_norm->scale.d = CCV_NNC_NO_TENSOR_SYMBOL;
1071
83
  model_batch_norm->scale.graph = 0;
1072
83
  model_batch_norm->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
1073
83
  model_batch_norm->bias.graph = 0;
1074
83
  model_batch_norm->params.bnorm.momentum = momentum;
1075
83
  model_batch_norm->params.bnorm.epsilon = epsilon;
1076
83
  return (ccv_cnnp_model_t*)model_batch_norm;
1077
83
}
1078
1079
static ccv_cnnp_model_t* _ccv_cnnp_batch_norm_copy(const ccv_cnnp_model_t* const super, void* const context)
1080
8
{
1081
8
  const ccv_cnnp_model_batch_norm_t* const self = (const ccv_cnnp_model_batch_norm_t*)super;
1082
8
  return ccv_cnnp_batch_norm(self->params.bnorm.momentum, self->params.bnorm.epsilon, self->super.is_trainable, self->super.name);
1083
8
}
1084
1085
// MARK - Convolution Layer
1086
1087
typedef struct {
1088
  ccv_cnnp_model_t super;
1089
  ccv_nnc_tensor_symbol_t output;
1090
  ccv_nnc_tensor_symbol_t weights;
1091
  ccv_nnc_tensor_symbol_t bias;
1092
  int groups;
1093
  int filters;
1094
  int kdim[CCV_NNC_MAX_DIM_ALLOC];
1095
  int dilation[CCV_NNC_MAX_DIM_ALLOC];
1096
  int no_bias;
1097
  int format;
1098
  ccv_nnc_hint_t hint;
1099
} ccv_cnnp_model_convolution_t;
1100
1101
static void _ccv_cnnp_convolution_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1102
114
{
1103
114
  ccv_cnnp_model_convolution_t* const self = (ccv_cnnp_model_convolution_t*)super;
1104
114
  PRINT(CCV_CLI_VERBOSE, "[cnnp_convolution_build] -\n");
1105
114
  assert(input_size == 1);
1106
114
  assert(output_size == 1);
1107
114
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1108
114
  int i;
1109
114
  const int k_nd = ccv_nnc_tensor_nd(self->kdim);
1110
114
  const int nd = k_nd + 2;
1111
114
  ccv_nnc_tensor_param_t weights_params = params;
1112
114
  if (self->format)
1113
0
    weights_params.format = self->format;
1114
114
  ccv_nnc_tensor_set_n(&weights_params, self->filters);
1115
114
  const int a_nd = ccv_nnc_tensor_nd(params.dim);
1116
114
  int c;
1117
114
  switch (params.format)
1118
114
  {
1119
15
    case CCV_TENSOR_FORMAT_NHWC:
1120
15
      c = params.dim[a_nd - 1];
1121
15
      break;
1122
99
    case CCV_TENSOR_FORMAT_NCHW:
1123
99
      if (a_nd == k_nd + 1)
1124
0
        c = params.dim[0];
1125
99
      else
1126
99
        c = params.dim[a_nd <= 1 ? 
00
: 1];
1127
99
      break;
1128
0
    case CCV_TENSOR_FORMAT_CHWN:
1129
0
      c = params.dim[0];
1130
0
      break;
1131
114
  }
1132
114
  assert(c % self->groups == 0);
1133
114
  ccv_nnc_tensor_set_c(&weights_params, nd, c / self->groups);
1134
114
  int hw = -1;
1135
114
  if (weights_params.format == CCV_TENSOR_FORMAT_NHWC || 
weights_params.format == CCV_TENSOR_FORMAT_CHWN99
)
1136
15
    hw = 1;
1137
99
  else if (weights_params.format == CCV_TENSOR_FORMAT_NCHW)
1138
99
    hw = 2;
1139
114
  assert(hw >= 0);
1140
342
  
for (i = 0; 114
i < k_nd;
i++228
)
1141
228
    weights_params.dim[i + hw] = self->kdim[i];
1142
114
  if (!self->weights.graph)
1143
110
    self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
1144
114
  assert(self->weights.graph == graph);
1145
114
  ccv_nnc_tensor_param_t bias_params = params;
1146
114
  if (self->format)
1147
0
    bias_params.format = self->format;
1148
114
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
1149
114
  bias_params.dim[0] = self->filters;
1150
114
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_FORWARD(self->groups, self->filters);
1151
342
  for (i = 0; i < k_nd; 
i++228
)
1152
228
    cmd.info.size.dim[i] = self->kdim[i];
1153
114
  cmd.info.size.dim[k_nd] = c;
1154
114
  memcpy(cmd.info.convolution.dilation, self->dilation, sizeof(self->dilation));
1155
114
  ccv_nnc_tensor_param_t output_params;
1156
  // Dilate weight size based on the dilation factor.
1157
342
  for (i = 0; i < k_nd; 
i++228
)
1158
228
    weights_params.dim[i + hw] = (self->kdim[i] - 1) * ccv_max(self->dilation[i], 1) + 1;
1159
114
  ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
1160
114
      params,
1161
114
      weights_params,
1162
114
      bias_params,
1163
114
    }, 3, self->hint, &output_params, 1);
1164
114
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1165
114
  ccv_nnc_graph_exec_symbol_t convolution;
1166
114
  if (self->no_bias)
1167
10
    convolution = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights), TENSOR_SYMBOL_LIST(output), "convolution");
1168
104
  else {
1169
104
    if (!self->bias.graph)
1170
100
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
1171
104
    convolution = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights, self->bias), TENSOR_SYMBOL_LIST(output), "convolution");
1172
104
  }
1173
114
  ccv_nnc_graph_exec_symbol_set_hint(graph, convolution, self->hint);
1174
114
  outputs[0] = output;
1175
114
}
1176
1177
static void _ccv_cnnp_convolution_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
1178
36
{
1179
36
  ccv_cnnp_model_convolution_t* const self = (ccv_cnnp_model_convolution_t*)super;
1180
36
  const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
1181
36
  const int n = ccv_max(ccv_nnc_tensor_get_n(weight_params), 1);
1182
36
  const int count = ccv_nnc_tensor_count(weight_params);
1183
36
  const float std = sqrtf(2) / sqrtf(count / n);
1184
36
  const float bound = sqrtf(3) * std;
1185
36
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
1186
36
  if (self->bias.graph)
1187
36
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
1188
36
}
1189
1190
static void _ccv_cnnp_convolution_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
1191
114
{
1192
114
  ccv_cnnp_model_convolution_t* const self = (ccv_cnnp_model_convolution_t*)super;
1193
114
  add_to_array(parameters, self->weights, is_trainable);
1194
114
  if (self->bias.graph)
1195
104
    add_to_array(parameters, self->bias, is_trainable);
1196
114
}
1197
1198
static ccv_cnnp_model_t* _ccv_cnnp_convolution_copy(const ccv_cnnp_model_t* const super, void* const context);
1199
1200
static const ccv_cnnp_model_vtab_t ccv_cnnp_convolution_isa = {
1201
  .build = _ccv_cnnp_convolution_build,
1202
  .init_states = _ccv_cnnp_convolution_init_states,
1203
  .add_to_parameter = _ccv_cnnp_convolution_add_to_parameter,
1204
  .copy = _ccv_cnnp_convolution_copy,
1205
};
1206
1207
ccv_cnnp_model_t* ccv_cnnp_convolution(const int groups, const int filters, const int kdim[CCV_NNC_MAX_DIM_ALLOC], const int dilation[CCV_NNC_MAX_DIM_ALLOC], const int no_bias, ccv_nnc_hint_t hint, const int format, const int is_trainable, const char* const name)
1208
126
{
1209
126
  ccv_cnnp_model_convolution_t* const model_convolution = (ccv_cnnp_model_convolution_t*)cccalloc(1, sizeof(ccv_cnnp_model_convolution_t));
1210
126
  model_convolution->super.isa = &ccv_cnnp_convolution_isa;
1211
126
  model_convolution->super.input_size = 1;
1212
126
  model_convolution->super.outputs = &model_convolution->output;
1213
126
  model_convolution->super.output_size = 1;
1214
126
  model_convolution->super.is_trainable = is_trainable;
1215
126
  ccv_cnnp_model_copy_name(&model_convolution->super, name);
1216
126
  model_convolution->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
1217
126
  model_convolution->weights.graph = 0;
1218
126
  model_convolution->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
1219
126
  model_convolution->bias.graph = 0;
1220
126
  model_convolution->groups = groups;
1221
126
  model_convolution->filters = filters;
1222
126
  memcpy(model_convolution->kdim, kdim, sizeof(model_convolution->kdim));
1223
126
  memcpy(model_convolution->dilation, dilation, sizeof(model_convolution->dilation));
1224
126
  model_convolution->no_bias = no_bias;
1225
126
  model_convolution->hint = hint;
1226
126
  model_convolution->format = format;
1227
126
  return (ccv_cnnp_model_t*)model_convolution;
1228
126
}
1229
1230
static ccv_cnnp_model_t* _ccv_cnnp_convolution_copy(const ccv_cnnp_model_t* const super, void* const context)
1231
16
{
1232
16
  ccv_cnnp_model_convolution_t* const self = (ccv_cnnp_model_convolution_t*)super;
1233
16
  return ccv_cnnp_convolution(self->groups, self->filters, self->kdim, self->dilation, self->no_bias, self->hint, self->format, self->super.is_trainable, self->super.name);
1234
16
}
1235
1236
// MARK - Convolution Transpose Layer
1237
1238
typedef struct {
1239
  ccv_cnnp_model_t super;
1240
  ccv_nnc_tensor_symbol_t output;
1241
  ccv_nnc_tensor_symbol_t weights;
1242
  ccv_nnc_tensor_symbol_t bias;
1243
  int groups;
1244
  int filters;
1245
  int kdim[CCV_NNC_MAX_DIM_ALLOC];
1246
  int dilation[CCV_NNC_MAX_DIM_ALLOC];
1247
  int output_padding;
1248
  int no_bias;
1249
  int format;
1250
  ccv_nnc_hint_t hint;
1251
} ccv_cnnp_model_convolution_transpose_t;
1252
1253
static void _ccv_cnnp_convolution_transpose_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1254
0
{
1255
0
  ccv_cnnp_model_convolution_transpose_t* const self = (ccv_cnnp_model_convolution_transpose_t*)super;
1256
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_convolution_transpose_build] -\n");
1257
0
  assert(input_size == 1);
1258
0
  assert(output_size == 1);
1259
0
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1260
0
  int i;
1261
0
  const int nd = CCV_NNC_MAX_DIM + 2;
1262
0
  ccv_nnc_tensor_param_t weights_params = params;
1263
0
  if (self->format)
1264
0
    weights_params.format = self->format;
1265
0
  const int c = ccv_nnc_tensor_get_c(params);
1266
0
  ccv_nnc_tensor_set_n(&weights_params, c);
1267
0
  assert(c % self->groups == 0);
1268
0
  ccv_nnc_tensor_set_c(&weights_params, nd, self->filters / self->groups);
1269
0
  const int hw = ccv_nnc_tensor_hw(weights_params, nd, CCV_NNC_MAX_DIM);
1270
0
  assert(hw >= 0);
1271
0
  for (i = 0; i < CCV_NNC_MAX_DIM; i++)
1272
0
    weights_params.dim[i + hw] = self->kdim[i];
1273
0
  if (!self->weights.graph)
1274
0
    self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
1275
0
  assert(self->weights.graph == graph);
1276
0
  ccv_nnc_tensor_param_t bias_params = params;
1277
0
  if (self->format)
1278
0
    bias_params.format = self->format;
1279
0
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
1280
0
  bias_params.dim[0] = self->filters;
1281
0
  ccv_nnc_cmd_t cmd = CMD_CONVOLUTION_TRANSPOSE_FORWARD(self->groups, self->filters, self->output_padding);
1282
0
  for (i = 0; i < CCV_NNC_MAX_DIM; i++)
1283
0
    cmd.info.size.dim[i] = self->kdim[i];
1284
0
  cmd.info.size.dim[CCV_NNC_MAX_DIM] = c;
1285
0
  memcpy(cmd.info.convolution_transpose.dilation, self->dilation, sizeof(self->dilation));
1286
0
  ccv_nnc_tensor_param_t output_params;
1287
  // Dilate weight size based on the dilation factor.
1288
0
  for (i = 0; i < CCV_NNC_MAX_DIM; i++)
1289
0
    weights_params.dim[i + hw] = (self->kdim[i] - 1) * ccv_max(self->dilation[i], 1) + 1;
1290
0
  ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
1291
0
      params,
1292
0
      weights_params,
1293
0
      bias_params,
1294
0
    }, 3, self->hint, &output_params, 1);
1295
0
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1296
0
  ccv_nnc_graph_exec_symbol_t convolution_transpose;
1297
0
  if (self->no_bias)
1298
0
    convolution_transpose = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights), TENSOR_SYMBOL_LIST(output), "convolution_transpose");
1299
0
  else {
1300
0
    if (!self->bias.graph)
1301
0
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
1302
0
    convolution_transpose = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights, self->bias), TENSOR_SYMBOL_LIST(output), "convolution_transpose");
1303
0
  }
1304
0
  ccv_nnc_graph_exec_symbol_set_hint(graph, convolution_transpose, self->hint);
1305
0
  outputs[0] = output;
1306
0
}
1307
1308
static void _ccv_cnnp_convolution_transpose_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
1309
0
{
1310
0
  ccv_cnnp_model_convolution_transpose_t* const self = (ccv_cnnp_model_convolution_transpose_t*)super;
1311
0
  const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
1312
0
  const int n = ccv_max(ccv_nnc_tensor_get_n(weight_params), 1);
1313
0
  const int count = ccv_nnc_tensor_count(weight_params);
1314
0
  const float std = sqrtf(2) / sqrtf(count / n);
1315
0
  const float bound = sqrtf(3) * std;
1316
0
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
1317
0
  if (self->bias.graph)
1318
0
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
1319
0
}
1320
1321
static void _ccv_cnnp_convolution_transpose_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
1322
0
{
1323
0
  ccv_cnnp_model_convolution_transpose_t* const self = (ccv_cnnp_model_convolution_transpose_t*)super;
1324
0
  add_to_array(parameters, self->weights, is_trainable);
1325
0
  if (self->bias.graph)
1326
0
    add_to_array(parameters, self->bias, is_trainable);
1327
0
}
1328
1329
static ccv_cnnp_model_t* _ccv_cnnp_convolution_transpose_copy(const ccv_cnnp_model_t* const super, void* const context);
1330
1331
static const ccv_cnnp_model_vtab_t ccv_cnnp_convolution_transpose_isa = {
1332
  .build = _ccv_cnnp_convolution_transpose_build,
1333
  .init_states = _ccv_cnnp_convolution_transpose_init_states,
1334
  .add_to_parameter = _ccv_cnnp_convolution_transpose_add_to_parameter,
1335
  .copy = _ccv_cnnp_convolution_transpose_copy,
1336
};
1337
1338
ccv_cnnp_model_t* ccv_cnnp_convolution_transpose(const int groups, const int filters, const int kdim[CCV_NNC_MAX_DIM_ALLOC], const int dilation[CCV_NNC_MAX_DIM_ALLOC], const int output_padding, const int no_bias, ccv_nnc_hint_t hint, const int format, const int is_trainable, const char* const name)
1339
0
{
1340
0
  ccv_cnnp_model_convolution_transpose_t* const model_convolution_transpose = (ccv_cnnp_model_convolution_transpose_t*)cccalloc(1, sizeof(ccv_cnnp_model_convolution_transpose_t));
1341
0
  model_convolution_transpose->super.isa = &ccv_cnnp_convolution_transpose_isa;
1342
0
  model_convolution_transpose->super.input_size = 1;
1343
0
  model_convolution_transpose->super.outputs = &model_convolution_transpose->output;
1344
0
  model_convolution_transpose->super.output_size = 1;
1345
0
  model_convolution_transpose->super.is_trainable = is_trainable;
1346
0
  ccv_cnnp_model_copy_name(&model_convolution_transpose->super, name);
1347
0
  model_convolution_transpose->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
1348
0
  model_convolution_transpose->weights.graph = 0;
1349
0
  model_convolution_transpose->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
1350
0
  model_convolution_transpose->bias.graph = 0;
1351
0
  model_convolution_transpose->groups = groups;
1352
0
  model_convolution_transpose->filters = filters;
1353
0
  memcpy(model_convolution_transpose->kdim, kdim, sizeof(model_convolution_transpose->kdim));
1354
0
  memcpy(model_convolution_transpose->dilation, dilation, sizeof(model_convolution_transpose->dilation));
1355
0
  model_convolution_transpose->output_padding = output_padding;
1356
0
  model_convolution_transpose->no_bias = no_bias;
1357
0
  model_convolution_transpose->hint = hint;
1358
0
  model_convolution_transpose->format = format;
1359
0
  return (ccv_cnnp_model_t*)model_convolution_transpose;
1360
0
}
1361
1362
static ccv_cnnp_model_t* _ccv_cnnp_convolution_transpose_copy(const ccv_cnnp_model_t* const super, void* const context)
1363
0
{
1364
0
  ccv_cnnp_model_convolution_transpose_t* const self = (ccv_cnnp_model_convolution_transpose_t*)super;
1365
0
  return ccv_cnnp_convolution_transpose(self->groups, self->filters, self->kdim, self->dilation, self->output_padding, self->no_bias, self->hint, self->format, self->super.is_trainable, self->super.name);
1366
0
}
1367
1368
// MARK - Dense Layer
1369
1370
typedef struct {
1371
  ccv_cnnp_model_t super;
1372
  ccv_nnc_tensor_symbol_t output;
1373
  ccv_nnc_tensor_symbol_t weights;
1374
  ccv_nnc_tensor_symbol_t bias;
1375
  int count;
1376
  int no_bias;
1377
  int flags;
1378
} ccv_cnnp_model_dense_t;
1379
1380
static void _ccv_cnnp_dense_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1381
2.33k
{
1382
2.33k
  ccv_cnnp_model_dense_t* const self = (ccv_cnnp_model_dense_t*)super;
1383
2.33k
  PRINT(CCV_CLI_VERBOSE, "[cnnp_dense_build] -\n");
1384
2.33k
  assert(input_size == 1);
1385
2.33k
  assert(output_size == 1);
1386
2.33k
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1387
2.33k
  ccv_nnc_tensor_param_t weights_params = params;
1388
2.33k
  memset(weights_params.dim, 0, sizeof(weights_params.dim));
1389
2.33k
  weights_params.dim[0] = self->count;
1390
2.33k
  weights_params.dim[1] = params.dim[ccv_nnc_tensor_nd(params.dim) - 1];
1391
2.33k
  if (!self->weights.graph)
1392
2.31k
    self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
1393
2.33k
  assert(self->weights.graph == graph);
1394
2.33k
  ccv_nnc_tensor_param_t bias_params = params;
1395
2.33k
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
1396
2.33k
  bias_params.dim[0] = self->count;
1397
2.33k
  ccv_nnc_cmd_t cmd = {0};
1398
2.33k
  cmd.cmd = CCV_NNC_GEMM_FORWARD;
1399
2.33k
  cmd.info.blas.a[0] = 1;
1400
2.33k
  cmd.info.blas.a[1] = 1;
1401
2.33k
  cmd.info.blas.transpose_b[0] = 0;
1402
2.33k
  cmd.info.blas.transpose_b[1] = 1;
1403
2.33k
  cmd.info.blas.flags = self->flags;
1404
2.33k
  ccv_nnc_tensor_param_t output_params;
1405
2.33k
  ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
1406
2.33k
      params,
1407
2.33k
      weights_params,
1408
2.33k
      bias_params,
1409
2.33k
    }, 3, ccv_nnc_no_hint, &output_params, 1);
1410
2.33k
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1411
2.33k
  if (self->no_bias)
1412
2.08k
    ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights), TENSOR_SYMBOL_LIST(output), "dense");
1413
246
  else {
1414
246
    if (!self->bias.graph)
1415
243
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
1416
246
    ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], self->weights, self->bias), TENSOR_SYMBOL_LIST(output), "dense");
1417
246
  }
1418
2.33k
  outputs[0] = output;
1419
2.33k
}
1420
1421
static void _ccv_cnnp_dense_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
1422
79
{
1423
79
  ccv_cnnp_model_dense_t* const self = (ccv_cnnp_model_dense_t*)super;
1424
79
  const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
1425
79
  const int c = weight_params.dim[1];
1426
79
  const float std = sqrtf(2) / sqrtf(c);
1427
79
  const float bound = sqrtf(3) * std;
1428
79
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
1429
79
  if (self->bias.graph)
1430
33
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
1431
79
}
1432
1433
static void _ccv_cnnp_dense_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
1434
2.33k
{
1435
2.33k
  ccv_cnnp_model_dense_t* const self = (ccv_cnnp_model_dense_t*)super;
1436
2.33k
  add_to_array(parameters, self->weights, is_trainable);
1437
2.33k
  if (self->bias.graph)
1438
246
    add_to_array(parameters, self->bias, is_trainable);
1439
2.33k
}
1440
1441
static ccv_cnnp_model_t* _ccv_cnnp_dense_copy(const ccv_cnnp_model_t* const super, void* const context);
1442
1443
static const ccv_cnnp_model_vtab_t ccv_cnnp_dense_isa = {
1444
  .build = _ccv_cnnp_dense_build,
1445
  .init_states = _ccv_cnnp_dense_init_states,
1446
  .add_to_parameter = _ccv_cnnp_dense_add_to_parameter,
1447
  .copy = _ccv_cnnp_dense_copy,
1448
};
1449
1450
ccv_cnnp_model_t* ccv_cnnp_dense(const int count, const int no_bias, const int flags, const int is_trainable, const char* const name)
1451
2.31k
{
1452
2.31k
  ccv_cnnp_model_dense_t* const model_dense = (ccv_cnnp_model_dense_t*)cccalloc(1, sizeof(ccv_cnnp_model_dense_t));
1453
2.31k
  model_dense->super.isa = &ccv_cnnp_dense_isa;
1454
2.31k
  model_dense->super.input_size = 1;
1455
2.31k
  model_dense->super.outputs = &model_dense->output;
1456
2.31k
  model_dense->super.output_size = 1;
1457
2.31k
  model_dense->super.is_trainable = is_trainable;
1458
2.31k
  ccv_cnnp_model_copy_name(&model_dense->super, name);
1459
2.31k
  model_dense->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
1460
2.31k
  model_dense->weights.graph = 0;
1461
2.31k
  model_dense->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
1462
2.31k
  model_dense->bias.graph = 0;
1463
2.31k
  model_dense->count = count;
1464
2.31k
  model_dense->no_bias = no_bias;
1465
2.31k
  model_dense->flags = flags;
1466
2.31k
  return (ccv_cnnp_model_t*)model_dense;
1467
2.31k
}
1468
1469
static ccv_cnnp_model_t* _ccv_cnnp_dense_copy(const ccv_cnnp_model_t* const super, void* const context)
1470
2.20k
{
1471
2.20k
  const ccv_cnnp_model_dense_t* const self = (const ccv_cnnp_model_dense_t*)super;
1472
2.20k
  return ccv_cnnp_dense(self->count, self->no_bias, self->flags, self->super.is_trainable, self->super.name);
1473
2.20k
}
1474
1475
// MARK - Pool Layers
1476
1477
typedef struct {
1478
  ccv_cnnp_model_t super;
1479
  ccv_nnc_tensor_symbol_t output;
1480
  int kdim[CCV_NNC_MAX_DIM_ALLOC];
1481
  ccv_nnc_hint_t hint;
1482
} ccv_cnnp_model_pool_t;
1483
1484
static void _ccv_cnnp_max_pool_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1485
18
{
1486
18
  ccv_cnnp_model_pool_t* const self = (ccv_cnnp_model_pool_t*)super;
1487
18
  PRINT(CCV_CLI_VERBOSE, "[cnnp_max_pool_build] -\n");
1488
18
  assert(input_size == 1);
1489
18
  assert(output_size == 1);
1490
18
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1491
18
  const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim), CCV_NNC_MAX_DIM);
1492
18
  ccv_nnc_cmd_t cmd;
1493
18
  if (hw >= 0 && self->kdim[0] == 0 && 
self->kdim[1] == 03
)
1494
3
    cmd = CMD_MAX_POOL_FORWARD(params.dim[hw], params.dim[hw + 1]);
1495
15
  else
1496
15
    cmd = CMD_MAX_POOL_FORWARD(self->kdim[0], self->kdim[1]);
1497
18
  ccv_nnc_tensor_param_t output_params;
1498
18
  ccv_nnc_hint_tensor_auto(cmd, &params, 1, self->hint, &output_params, 1);
1499
18
  const ccv_nnc_tensor_symbol_t pool_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1500
18
  const ccv_nnc_graph_exec_symbol_t exec = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(pool_output), "max_pool");
1501
18
  ccv_nnc_graph_exec_symbol_set_hint(graph, exec, self->hint);
1502
18
  outputs[0] = pool_output;
1503
18
}
1504
1505
static ccv_cnnp_model_t* _ccv_cnnp_max_pool_copy(const ccv_cnnp_model_t* const super, void* const context);
1506
1507
static const ccv_cnnp_model_vtab_t ccv_cnnp_max_pool_isa = {
1508
  .build = _ccv_cnnp_max_pool_build,
1509
  .copy = _ccv_cnnp_max_pool_copy,
1510
};
1511
1512
ccv_cnnp_model_t* ccv_cnnp_max_pool(const int kdim[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_hint_t hint, const char* const name)
1513
24
{
1514
24
  ccv_cnnp_model_pool_t* const model_pool = (ccv_cnnp_model_pool_t*)cccalloc(1, sizeof(ccv_cnnp_model_pool_t));
1515
24
  model_pool->super.isa = &ccv_cnnp_max_pool_isa;
1516
24
  model_pool->super.input_size = 1;
1517
24
  model_pool->super.outputs = &model_pool->output;
1518
24
  model_pool->super.output_size = 1;
1519
24
  ccv_cnnp_model_copy_name(&model_pool->super, name);
1520
24
  memcpy(model_pool->kdim, kdim, sizeof(model_pool->kdim));
1521
24
  model_pool->hint = hint;
1522
24
  return (ccv_cnnp_model_t*)model_pool;
1523
24
}
1524
1525
static ccv_cnnp_model_t* _ccv_cnnp_max_pool_copy(const ccv_cnnp_model_t* const super, void* const context)
1526
6
{
1527
6
  const ccv_cnnp_model_pool_t* const self = (const ccv_cnnp_model_pool_t*)super;
1528
6
  return ccv_cnnp_max_pool(self->kdim, self->hint, self->super.name);
1529
6
}
1530
1531
static void _ccv_cnnp_average_pool_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1532
15
{
1533
15
  ccv_cnnp_model_pool_t* const self = (ccv_cnnp_model_pool_t*)super;
1534
15
  PRINT(CCV_CLI_VERBOSE, "[cnnp_average_pool_build] -\n");
1535
15
  assert(input_size == 1);
1536
15
  assert(output_size == 1);
1537
15
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1538
15
  const int hw = ccv_nnc_tensor_hw(params, ccv_nnc_tensor_nd(params.dim), CCV_NNC_MAX_DIM);
1539
15
  ccv_nnc_cmd_t cmd;
1540
15
  if (hw >= 0 && self->kdim[0] == 0 && 
self->kdim[1] == 02
)
1541
2
    cmd = CMD_AVERAGE_POOL_FORWARD(params.dim[hw], params.dim[hw + 1]);
1542
13
  else
1543
13
    cmd = CMD_AVERAGE_POOL_FORWARD(self->kdim[0], self->kdim[1]);
1544
15
  ccv_nnc_tensor_param_t output_params;
1545
15
  ccv_nnc_hint_tensor_auto(cmd, &params, 1, self->hint, &output_params, 1);
1546
15
  const ccv_nnc_tensor_symbol_t pool_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1547
15
  const ccv_nnc_graph_exec_symbol_t exec = ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(pool_output), "average_pool");
1548
15
  ccv_nnc_graph_exec_symbol_set_hint(graph, exec, self->hint);
1549
15
  outputs[0] = pool_output;
1550
15
}
1551
1552
static ccv_cnnp_model_t* _ccv_cnnp_average_pool_copy(const ccv_cnnp_model_t* const super, void* const context);
1553
1554
static const ccv_cnnp_model_vtab_t ccv_cnnp_average_pool_isa = {
1555
  .build = _ccv_cnnp_average_pool_build,
1556
  .copy = _ccv_cnnp_average_pool_copy,
1557
};
1558
1559
ccv_cnnp_model_t* ccv_cnnp_average_pool(const int kdim[CCV_NNC_MAX_DIM_ALLOC], const ccv_nnc_hint_t hint, const char* const name)
1560
17
{
1561
17
  ccv_cnnp_model_pool_t* const model_pool = (ccv_cnnp_model_pool_t*)cccalloc(1, sizeof(ccv_cnnp_model_pool_t));
1562
17
  model_pool->super.isa = &ccv_cnnp_average_pool_isa;
1563
17
  model_pool->super.input_size = 1;
1564
17
  model_pool->super.outputs = &model_pool->output;
1565
17
  model_pool->super.output_size = 1;
1566
17
  ccv_cnnp_model_copy_name(&model_pool->super, name);
1567
17
  memcpy(model_pool->kdim, kdim, sizeof(model_pool->kdim));
1568
17
  model_pool->hint = hint;
1569
17
  return (ccv_cnnp_model_t*)model_pool;
1570
17
}
1571
1572
static ccv_cnnp_model_t* _ccv_cnnp_average_pool_copy(const ccv_cnnp_model_t* const super, void* const context)
1573
2
{
1574
2
  const ccv_cnnp_model_pool_t* const self = (const ccv_cnnp_model_pool_t*)super;
1575
2
  return ccv_cnnp_average_pool(self->kdim, self->hint, self->super.name);
1576
2
}
1577
1578
// MARK - RELU Layer
1579
1580
typedef struct {
1581
  ccv_cnnp_model_t super;
1582
  ccv_nnc_tensor_symbol_t output;
1583
} ccv_cnnp_model_relu_t;
1584
1585
static void _ccv_cnnp_relu_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1586
103
{
1587
103
  PRINT(CCV_CLI_VERBOSE, "[cnnp_relu_build] -\n");
1588
103
  assert(input_size == 1);
1589
103
  assert(output_size == 1);
1590
103
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1591
103
  ccv_nnc_tensor_param_t output_params;
1592
103
  const ccv_nnc_cmd_t relu = CMD_RELU_FORWARD();
1593
103
  ccv_nnc_hint_tensor_auto(relu, (ccv_nnc_tensor_param_t []){
1594
103
      params,
1595
103
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1596
103
  const ccv_nnc_tensor_symbol_t relu_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1597
103
  ccv_nnc_graph_exec_symbol_new(graph, relu, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(relu_output), "relu");
1598
103
  outputs[0] = relu_output;
1599
103
}
1600
1601
static ccv_cnnp_model_t* _ccv_cnnp_relu_copy(const ccv_cnnp_model_t* const self, void* const context);
1602
1603
static const ccv_cnnp_model_vtab_t ccv_cnnp_relu_isa = {
1604
  .build = _ccv_cnnp_relu_build,
1605
  .copy = _ccv_cnnp_relu_copy,
1606
};
1607
1608
ccv_cnnp_model_t* ccv_cnnp_relu(const char* const name)
1609
120
{
1610
120
  ccv_cnnp_model_relu_t* const model_relu = (ccv_cnnp_model_relu_t*)cccalloc(1, sizeof(ccv_cnnp_model_relu_t));
1611
120
  model_relu->super.isa = &ccv_cnnp_relu_isa;
1612
120
  model_relu->super.input_size = 1;
1613
120
  model_relu->super.outputs = &model_relu->output;
1614
120
  model_relu->super.output_size = 1;
1615
120
  ccv_cnnp_model_copy_name(&model_relu->super, name);
1616
120
  return (ccv_cnnp_model_t*)model_relu;
1617
120
}
1618
1619
static ccv_cnnp_model_t* _ccv_cnnp_relu_copy(const ccv_cnnp_model_t* const self, void* const context)
1620
17
{
1621
17
  return ccv_cnnp_relu(self->name);
1622
17
}
1623
1624
// MARK - Sigmoid Layer
1625
1626
typedef struct {
1627
  ccv_cnnp_model_t super;
1628
  ccv_nnc_tensor_symbol_t output;
1629
} ccv_cnnp_model_sigmoid_t;
1630
1631
static void _ccv_cnnp_sigmoid_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1632
5
{
1633
5
  PRINT(CCV_CLI_VERBOSE, "[cnnp_sigmoid_build] -\n");
1634
5
  assert(input_size == 1);
1635
5
  assert(output_size == 1);
1636
5
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1637
5
  ccv_nnc_tensor_param_t output_params;
1638
5
  const ccv_nnc_cmd_t sigmoid = CMD_SIGMOID_FORWARD();
1639
5
  ccv_nnc_hint_tensor_auto(sigmoid, (ccv_nnc_tensor_param_t []){
1640
5
      params,
1641
5
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1642
5
  const ccv_nnc_tensor_symbol_t sigmoid_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1643
5
  ccv_nnc_graph_exec_symbol_new(graph, sigmoid, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(sigmoid_output), "sigmoid");
1644
5
  outputs[0] = sigmoid_output;
1645
5
}
1646
1647
static ccv_cnnp_model_t* _ccv_cnnp_sigmoid_copy(const ccv_cnnp_model_t* const self, void* const context);
1648
1649
static const ccv_cnnp_model_vtab_t ccv_cnnp_sigmoid_isa = {
1650
  .build = _ccv_cnnp_sigmoid_build,
1651
  .copy = _ccv_cnnp_sigmoid_copy,
1652
};
1653
1654
ccv_cnnp_model_t* ccv_cnnp_sigmoid(const char* const name)
1655
5
{
1656
5
  ccv_cnnp_model_sigmoid_t* const model_sigmoid = (ccv_cnnp_model_sigmoid_t*)cccalloc(1, sizeof(ccv_cnnp_model_sigmoid_t));
1657
5
  model_sigmoid->super.isa = &ccv_cnnp_sigmoid_isa;
1658
5
  model_sigmoid->super.input_size = 1;
1659
5
  model_sigmoid->super.outputs = &model_sigmoid->output;
1660
5
  model_sigmoid->super.output_size = 1;
1661
5
  ccv_cnnp_model_copy_name(&model_sigmoid->super, name);
1662
5
  return (ccv_cnnp_model_t*)model_sigmoid;
1663
5
}
1664
1665
static ccv_cnnp_model_t* _ccv_cnnp_sigmoid_copy(const ccv_cnnp_model_t* const self, void* const context)
1666
0
{
1667
0
  return ccv_cnnp_sigmoid(self->name);
1668
0
}
1669
1670
// MARK - Tanh Layer
1671
1672
typedef struct {
1673
  ccv_cnnp_model_t super;
1674
  ccv_nnc_tensor_symbol_t output;
1675
} ccv_cnnp_model_tanh_t;
1676
1677
static void _ccv_cnnp_tanh_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1678
0
{
1679
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_tanh_build] -\n");
1680
0
  assert(input_size == 1);
1681
0
  assert(output_size == 1);
1682
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1683
0
  ccv_nnc_tensor_param_t output_params;
1684
0
  const ccv_nnc_cmd_t tanh = CMD_TANH_FORWARD();
1685
0
  ccv_nnc_hint_tensor_auto(tanh, (ccv_nnc_tensor_param_t []){
1686
0
      params,
1687
0
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1688
0
  const ccv_nnc_tensor_symbol_t tanh_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1689
0
  ccv_nnc_graph_exec_symbol_new(graph, tanh, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(tanh_output), "tanh");
1690
0
  outputs[0] = tanh_output;
1691
0
}
1692
1693
static ccv_cnnp_model_t* _ccv_cnnp_tanh_copy(const ccv_cnnp_model_t* const self, void* const context);
1694
1695
static const ccv_cnnp_model_vtab_t ccv_cnnp_tanh_isa = {
1696
  .build = _ccv_cnnp_tanh_build,
1697
  .copy = _ccv_cnnp_tanh_copy,
1698
};
1699
1700
ccv_cnnp_model_t* ccv_cnnp_tanh(const char* const name)
1701
0
{
1702
0
  ccv_cnnp_model_tanh_t* const model_tanh = (ccv_cnnp_model_tanh_t*)cccalloc(1, sizeof(ccv_cnnp_model_tanh_t));
1703
0
  model_tanh->super.isa = &ccv_cnnp_tanh_isa;
1704
0
  model_tanh->super.input_size = 1;
1705
0
  model_tanh->super.outputs = &model_tanh->output;
1706
0
  model_tanh->super.output_size = 1;
1707
0
  ccv_cnnp_model_copy_name(&model_tanh->super, name);
1708
0
  return (ccv_cnnp_model_t*)model_tanh;
1709
0
}
1710
1711
static ccv_cnnp_model_t* _ccv_cnnp_tanh_copy(const ccv_cnnp_model_t* const self, void* const context)
1712
0
{
1713
0
  return ccv_cnnp_tanh(self->name);
1714
0
}
1715
1716
// MARK - Swish Layer
1717
1718
typedef struct {
1719
  ccv_cnnp_model_t super;
1720
  ccv_nnc_tensor_symbol_t output;
1721
} ccv_cnnp_model_swish_t;
1722
1723
static void _ccv_cnnp_swish_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1724
0
{
1725
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_swish_build] -\n");
1726
0
  assert(input_size == 1);
1727
0
  assert(output_size == 1);
1728
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1729
0
  ccv_nnc_tensor_param_t output_params;
1730
0
  const ccv_nnc_cmd_t swish = CMD_SWISH_FORWARD();
1731
0
  ccv_nnc_hint_tensor_auto(swish, (ccv_nnc_tensor_param_t []){
1732
0
      params,
1733
0
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1734
0
  const ccv_nnc_tensor_symbol_t swish_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1735
0
  ccv_nnc_graph_exec_symbol_new(graph, swish, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(swish_output), "swish");
1736
0
  outputs[0] = swish_output;
1737
0
}
1738
1739
static ccv_cnnp_model_t* _ccv_cnnp_swish_copy(const ccv_cnnp_model_t* const self, void* const context);
1740
1741
static const ccv_cnnp_model_vtab_t ccv_cnnp_swish_isa = {
1742
  .build = _ccv_cnnp_swish_build,
1743
  .copy = _ccv_cnnp_swish_copy,
1744
};
1745
1746
ccv_cnnp_model_t* ccv_cnnp_swish(const char* const name)
1747
0
{
1748
0
  ccv_cnnp_model_swish_t* const model_swish = (ccv_cnnp_model_swish_t*)cccalloc(1, sizeof(ccv_cnnp_model_swish_t));
1749
0
  model_swish->super.isa = &ccv_cnnp_swish_isa;
1750
0
  model_swish->super.input_size = 1;
1751
0
  model_swish->super.outputs = &model_swish->output;
1752
0
  model_swish->super.output_size = 1;
1753
0
  ccv_cnnp_model_copy_name(&model_swish->super, name);
1754
0
  return (ccv_cnnp_model_t*)model_swish;
1755
0
}
1756
1757
static ccv_cnnp_model_t* _ccv_cnnp_swish_copy(const ccv_cnnp_model_t* const self, void* const context)
1758
0
{
1759
0
  return ccv_cnnp_swish(self->name);
1760
0
}
1761
1762
// MARK - GELU Layer
1763
1764
typedef struct {
1765
  ccv_cnnp_model_t super;
1766
  ccv_nnc_tensor_symbol_t output;
1767
  int tanh;
1768
} ccv_cnnp_model_gelu_t;
1769
1770
static void _ccv_cnnp_gelu_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1771
2
{
1772
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_gelu_build] -\n");
1773
2
  assert(input_size == 1);
1774
2
  assert(output_size == 1);
1775
2
  ccv_cnnp_model_gelu_t* const self = (ccv_cnnp_model_gelu_t*)super;
1776
2
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1777
2
  ccv_nnc_tensor_param_t output_params;
1778
2
  const ccv_nnc_cmd_t gelu = CMD_GELU_FORWARD(self->tanh);
1779
2
  ccv_nnc_hint_tensor_auto(gelu, (ccv_nnc_tensor_param_t []){
1780
2
      params,
1781
2
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1782
2
  const ccv_nnc_tensor_symbol_t gelu_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1783
2
  ccv_nnc_graph_exec_symbol_new(graph, gelu, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(gelu_output), "gelu");
1784
2
  outputs[0] = gelu_output;
1785
2
}
1786
1787
static ccv_cnnp_model_t* _ccv_cnnp_gelu_copy(const ccv_cnnp_model_t* const self, void* const context);
1788
1789
static const ccv_cnnp_model_vtab_t ccv_cnnp_gelu_isa = {
1790
  .build = _ccv_cnnp_gelu_build,
1791
  .copy = _ccv_cnnp_gelu_copy,
1792
};
1793
1794
ccv_cnnp_model_t* ccv_cnnp_gelu(const int tanh, const char* const name)
1795
1
{
1796
1
  ccv_cnnp_model_gelu_t* const model_gelu = (ccv_cnnp_model_gelu_t*)cccalloc(1, sizeof(ccv_cnnp_model_gelu_t));
1797
1
  model_gelu->super.isa = &ccv_cnnp_gelu_isa;
1798
1
  model_gelu->super.input_size = 1;
1799
1
  model_gelu->super.outputs = &model_gelu->output;
1800
1
  model_gelu->super.output_size = 1;
1801
1
  model_gelu->tanh = tanh;
1802
1
  ccv_cnnp_model_copy_name(&model_gelu->super, name);
1803
1
  return (ccv_cnnp_model_t*)model_gelu;
1804
1
}
1805
1806
static ccv_cnnp_model_t* _ccv_cnnp_gelu_copy(const ccv_cnnp_model_t* const super, void* const context)
1807
0
{
1808
0
  ccv_cnnp_model_gelu_t* const self = (ccv_cnnp_model_gelu_t*)super;
1809
0
  return ccv_cnnp_gelu(self->tanh, self->super.name);
1810
0
}
1811
1812
// MARK - Leaky ReLU Layer
1813
1814
typedef struct {
1815
  ccv_cnnp_model_t super;
1816
  ccv_nnc_tensor_symbol_t output;
1817
  float negative_slope;
1818
} ccv_cnnp_model_leaky_relu_t;
1819
1820
static void _ccv_cnnp_leaky_relu_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1821
0
{
1822
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_leaky_relu_build] -\n");
1823
0
  assert(input_size == 1);
1824
0
  assert(output_size == 1);
1825
0
  ccv_cnnp_model_leaky_relu_t* const self = (ccv_cnnp_model_leaky_relu_t*)super;
1826
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1827
0
  ccv_nnc_tensor_param_t output_params;
1828
0
  const ccv_nnc_cmd_t leaky_relu = CMD_LEAKY_RELU_FORWARD(self->negative_slope);
1829
0
  ccv_nnc_hint_tensor_auto(leaky_relu, (ccv_nnc_tensor_param_t []){
1830
0
      params,
1831
0
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1832
0
  const ccv_nnc_tensor_symbol_t leaky_relu_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1833
0
  ccv_nnc_graph_exec_symbol_new(graph, leaky_relu, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(leaky_relu_output), "leaky_relu");
1834
0
  outputs[0] = leaky_relu_output;
1835
0
}
1836
1837
static ccv_cnnp_model_t* _ccv_cnnp_leaky_relu_copy(const ccv_cnnp_model_t* const self, void* const context);
1838
1839
static const ccv_cnnp_model_vtab_t ccv_cnnp_leaky_relu_isa = {
1840
  .build = _ccv_cnnp_leaky_relu_build,
1841
  .copy = _ccv_cnnp_leaky_relu_copy,
1842
};
1843
1844
ccv_cnnp_model_t* ccv_cnnp_leaky_relu(const float negative_slope, const char* const name)
1845
0
{
1846
0
  ccv_cnnp_model_leaky_relu_t* const model_leaky_relu = (ccv_cnnp_model_leaky_relu_t*)cccalloc(1, sizeof(ccv_cnnp_model_leaky_relu_t));
1847
0
  model_leaky_relu->super.isa = &ccv_cnnp_leaky_relu_isa;
1848
0
  model_leaky_relu->super.input_size = 1;
1849
0
  model_leaky_relu->super.outputs = &model_leaky_relu->output;
1850
0
  model_leaky_relu->super.output_size = 1;
1851
0
  model_leaky_relu->negative_slope = negative_slope;
1852
0
  ccv_cnnp_model_copy_name(&model_leaky_relu->super, name);
1853
0
  return (ccv_cnnp_model_t*)model_leaky_relu;
1854
0
}
1855
1856
static ccv_cnnp_model_t* _ccv_cnnp_leaky_relu_copy(const ccv_cnnp_model_t* const super, void* const context)
1857
0
{
1858
0
  ccv_cnnp_model_leaky_relu_t* const self = (ccv_cnnp_model_leaky_relu_t*)super;
1859
0
  return ccv_cnnp_leaky_relu(self->negative_slope, self->super.name);
1860
0
}
1861
1862
// MARK - Softmax Layer
1863
1864
typedef struct {
1865
  ccv_cnnp_model_t super;
1866
  ccv_nnc_tensor_symbol_t output;
1867
} ccv_cnnp_model_softmax_t;
1868
1869
static void _ccv_cnnp_softmax_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1870
8
{
1871
8
  PRINT(CCV_CLI_VERBOSE, "[cnnp_softmax_build] -\n");
1872
8
  assert(input_size == 1);
1873
8
  assert(output_size == 1);
1874
8
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
1875
8
  ccv_nnc_tensor_param_t output_params;
1876
8
  const ccv_nnc_cmd_t softmax = CMD_SOFTMAX_FORWARD();
1877
8
  ccv_nnc_hint_tensor_auto(softmax, (ccv_nnc_tensor_param_t []){
1878
8
      params,
1879
8
    }, 1, ccv_nnc_no_hint, &output_params, 1);
1880
8
  const ccv_nnc_tensor_symbol_t softmax_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1881
8
  ccv_nnc_graph_exec_symbol_new(graph, softmax, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(softmax_output), "softmax");
1882
8
  outputs[0] = softmax_output;
1883
8
}
1884
1885
static ccv_cnnp_model_t* _ccv_cnnp_softmax_copy(const ccv_cnnp_model_t* const self, void* const context);
1886
1887
static const ccv_cnnp_model_vtab_t ccv_cnnp_softmax_isa = {
1888
  .build = _ccv_cnnp_softmax_build,
1889
  .copy = _ccv_cnnp_softmax_copy,
1890
};
1891
1892
ccv_cnnp_model_t* ccv_cnnp_softmax(const char* const name)
1893
9
{
1894
9
  ccv_cnnp_model_softmax_t* const model_softmax = (ccv_cnnp_model_softmax_t*)cccalloc(1, sizeof(ccv_cnnp_model_softmax_t));
1895
9
  model_softmax->super.isa = &ccv_cnnp_softmax_isa;
1896
9
  model_softmax->super.input_size = 1;
1897
9
  model_softmax->super.outputs = &model_softmax->output;
1898
9
  model_softmax->super.output_size = 1;
1899
9
  ccv_cnnp_model_copy_name(&model_softmax->super, name);
1900
9
  return (ccv_cnnp_model_t*)model_softmax;
1901
9
}
1902
1903
static ccv_cnnp_model_t* _ccv_cnnp_softmax_copy(const ccv_cnnp_model_t* const self, void* const context)
1904
1
{
1905
1
  return ccv_cnnp_softmax(self->name);
1906
1
}
1907
1908
// MARK - Add Layer
1909
1910
typedef struct {
1911
  ccv_cnnp_model_t super;
1912
  float p;
1913
  float q;
1914
  ccv_nnc_tensor_symbol_t output;
1915
} ccv_cnnp_model_add_t;
1916
1917
static void _ccv_cnnp_add_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1918
0
{
1919
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_add_build] -\n");
1920
0
  const ccv_cnnp_model_add_t* const self = (const ccv_cnnp_model_add_t*)super;
1921
0
  assert(input_size == 2);
1922
0
  assert(output_size == 1);
1923
0
  ccv_nnc_tensor_param_t input_params[2];
1924
0
  int i;
1925
0
  for (i = 0; i < 2; i++)
1926
0
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
1927
0
  ccv_nnc_tensor_param_t output_params;
1928
0
  const ccv_nnc_cmd_t add = CMD_ADD_FORWARD(self->p, self->q);
1929
0
  ccv_nnc_hint_tensor_auto(add, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
1930
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1931
0
  ccv_nnc_graph_exec_symbol_new(graph, add, inputs, input_size, outputs, output_size, "add");
1932
0
}
1933
1934
static ccv_cnnp_model_t* _ccv_cnnp_add_copy(const ccv_cnnp_model_t* const self, void* const context);
1935
1936
static const ccv_cnnp_model_vtab_t ccv_cnnp_add_isa = {
1937
  .build = _ccv_cnnp_add_build,
1938
  .copy = _ccv_cnnp_add_copy,
1939
};
1940
1941
ccv_cnnp_model_t* ccv_cnnp_add(const float p, const float q, const char* const name)
1942
0
{
1943
0
  ccv_cnnp_model_add_t* const model_add = (ccv_cnnp_model_add_t*)cccalloc(1, sizeof(ccv_cnnp_model_add_t));
1944
0
  model_add->super.isa = &ccv_cnnp_add_isa;
1945
0
  model_add->super.input_size = 2;
1946
0
  model_add->super.outputs = &model_add->output;
1947
0
  model_add->super.output_size = 1;
1948
0
  model_add->p = p;
1949
0
  model_add->q = q;
1950
0
  ccv_cnnp_model_copy_name(&model_add->super, name);
1951
0
  return (ccv_cnnp_model_t*)model_add;
1952
0
}
1953
1954
static ccv_cnnp_model_t* _ccv_cnnp_add_copy(const ccv_cnnp_model_t* const super, void* const context)
1955
0
{
1956
0
  const ccv_cnnp_model_add_t* const self = (const ccv_cnnp_model_add_t*)super;
1957
0
  return ccv_cnnp_add(self->p, self->q, self->super.name);
1958
0
}
1959
1960
// MARK - Mul Layer
1961
1962
typedef struct {
1963
  ccv_cnnp_model_t super;
1964
  ccv_nnc_tensor_symbol_t output;
1965
  float p;
1966
} ccv_cnnp_model_mul_t;
1967
1968
static void _ccv_cnnp_mul_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
1969
6
{
1970
6
  PRINT(CCV_CLI_VERBOSE, "[cnnp_mul_build] -\n");
1971
6
  const ccv_cnnp_model_mul_t* const self = (const ccv_cnnp_model_mul_t*)super;
1972
6
  assert(input_size == 2);
1973
6
  assert(output_size == 1);
1974
6
  ccv_nnc_tensor_param_t input_params[2];
1975
6
  int i;
1976
18
  for (i = 0; i < 2; 
i++12
)
1977
12
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
1978
6
  ccv_nnc_tensor_param_t output_params;
1979
6
  const ccv_nnc_cmd_t mul = CMD_MUL_FORWARD(self->p);
1980
6
  ccv_nnc_hint_tensor_auto(mul, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
1981
6
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
1982
6
  ccv_nnc_graph_exec_symbol_new(graph, mul, inputs, input_size, outputs, output_size, "mul");
1983
6
}
1984
1985
static ccv_cnnp_model_t* _ccv_cnnp_mul_copy(const ccv_cnnp_model_t* const self, void* const context);
1986
1987
static const ccv_cnnp_model_vtab_t ccv_cnnp_mul_isa = {
1988
  .build = _ccv_cnnp_mul_build,
1989
  .copy = _ccv_cnnp_mul_copy,
1990
};
1991
1992
ccv_cnnp_model_t* ccv_cnnp_mul(const float p, const char* const name)
1993
5
{
1994
5
  ccv_cnnp_model_mul_t* const model_mul = (ccv_cnnp_model_mul_t*)cccalloc(1, sizeof(ccv_cnnp_model_mul_t));
1995
5
  model_mul->super.isa = &ccv_cnnp_mul_isa;
1996
5
  model_mul->super.input_size = 2;
1997
5
  model_mul->super.outputs = &model_mul->output;
1998
5
  model_mul->super.output_size = 1;
1999
5
  model_mul->p = p;
2000
5
  ccv_cnnp_model_copy_name(&model_mul->super, name);
2001
5
  return (ccv_cnnp_model_t*)model_mul;
2002
5
}
2003
2004
static ccv_cnnp_model_t* _ccv_cnnp_mul_copy(const ccv_cnnp_model_t* const super, void* const context)
2005
0
{
2006
0
  const ccv_cnnp_model_mul_t* const self = (const ccv_cnnp_model_mul_t*)super;
2007
0
  return ccv_cnnp_mul(self->p, self->super.name);
2008
0
}
2009
2010
// MARK - Scalar Mul Layer
2011
2012
typedef struct {
2013
  ccv_cnnp_model_t super;
2014
  ccv_nnc_tensor_symbol_t output;
2015
  float a;
2016
} ccv_cnnp_model_scalar_mul_t;
2017
2018
static void _ccv_cnnp_scalar_mul_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2019
4
{
2020
4
  PRINT(CCV_CLI_VERBOSE, "[cnnp_scalar_mul_build] -\n");
2021
4
  assert(input_size == 1);
2022
4
  assert(output_size == 1);
2023
4
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2024
4
  ccv_nnc_tensor_param_t output_params;
2025
4
  ccv_cnnp_model_scalar_mul_t* const self = (ccv_cnnp_model_scalar_mul_t*)super;
2026
4
  const ccv_nnc_cmd_t scalar_mul = CMD_SCALAR_MUL_FORWARD(self->a);
2027
4
  ccv_nnc_hint_tensor_auto(scalar_mul, (ccv_nnc_tensor_param_t []){
2028
4
      params,
2029
4
    }, 1, ccv_nnc_no_hint, &output_params, 1);
2030
4
  const ccv_nnc_tensor_symbol_t scalar_mul_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2031
4
  ccv_nnc_graph_exec_symbol_new(graph, scalar_mul, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(scalar_mul_output), "scalar_mul");
2032
4
  outputs[0] = scalar_mul_output;
2033
4
}
2034
2035
static ccv_cnnp_model_t* _ccv_cnnp_scalar_mul_copy(const ccv_cnnp_model_t* const super, void* const context);
2036
2037
static const ccv_cnnp_model_vtab_t ccv_cnnp_scalar_mul_isa = {
2038
  .build = _ccv_cnnp_scalar_mul_build,
2039
  .copy = _ccv_cnnp_scalar_mul_copy,
2040
};
2041
2042
ccv_cnnp_model_t* ccv_cnnp_scalar_mul(const float a, const char* const name)
2043
4
{
2044
4
  ccv_cnnp_model_scalar_mul_t* const model_scalar_mul = (ccv_cnnp_model_scalar_mul_t*)cccalloc(1, sizeof(ccv_cnnp_model_scalar_mul_t));
2045
4
  model_scalar_mul->super.isa = &ccv_cnnp_scalar_mul_isa;
2046
4
  model_scalar_mul->super.input_size = 1;
2047
4
  model_scalar_mul->super.outputs = &model_scalar_mul->output;
2048
4
  model_scalar_mul->super.output_size = 1;
2049
4
  model_scalar_mul->a = a;
2050
4
  ccv_cnnp_model_copy_name(&model_scalar_mul->super, name);
2051
4
  return (ccv_cnnp_model_t*)model_scalar_mul;
2052
4
}
2053
2054
static ccv_cnnp_model_t* _ccv_cnnp_scalar_mul_copy(const ccv_cnnp_model_t* const super, void* const context)
2055
0
{
2056
0
  const ccv_cnnp_model_scalar_mul_t* const self = (const ccv_cnnp_model_scalar_mul_t*)super;
2057
0
  return ccv_cnnp_scalar_mul(self->a, self->super.name);
2058
0
}
2059
2060
// MARK - Div Layer
2061
2062
typedef struct {
2063
  ccv_cnnp_model_t super;
2064
  ccv_nnc_tensor_symbol_t output;
2065
  int reciprocal;
2066
} ccv_cnnp_model_div_t;
2067
2068
static void _ccv_cnnp_div_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2069
2
{
2070
2
  const ccv_cnnp_model_div_t* const self = (const ccv_cnnp_model_div_t*)super;
2071
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_div_build] -\n");
2072
2
  assert(output_size == 1);
2073
2
  ccv_nnc_tensor_param_t input_params[2];
2074
2
  int i;
2075
2
  ccv_nnc_tensor_param_t output_params;
2076
2
  const ccv_nnc_cmd_t div = CMD_EWDIV_FORWARD();
2077
2
  if (self->reciprocal)
2078
1
  {
2079
1
    assert(input_size == 1);
2080
1
    input_params[0] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2081
1
    input_params[1] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2082
1
    ccv_nnc_hint_tensor_auto(div, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
2083
1
    outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2084
1
    ccv_nnc_graph_exec_symbol_new(graph, div, TENSOR_SYMBOL_LIST(NO_TENSOR_SYMBOL, inputs[0]), outputs, output_size, "div");
2085
1
  } else {
2086
1
    assert(input_size == 2);
2087
3
    
for (i = 0; 1
i < 2;
i++2
)
2088
2
      input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
2089
1
    ccv_nnc_hint_tensor_auto(div, input_params, input_size, ccv_nnc_no_hint, &output_params, 1);
2090
1
    outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2091
1
    ccv_nnc_graph_exec_symbol_new(graph, div, inputs, input_size, outputs, output_size, "div");
2092
1
  }
2093
2
}
2094
2095
static ccv_cnnp_model_t* _ccv_cnnp_div_copy(const ccv_cnnp_model_t* const self, void* const context);
2096
2097
static const ccv_cnnp_model_vtab_t ccv_cnnp_div_isa = {
2098
  .build = _ccv_cnnp_div_build,
2099
  .copy = _ccv_cnnp_div_copy,
2100
};
2101
2102
ccv_cnnp_model_t* ccv_cnnp_div(const int reciprocal, const char* const name)
2103
2
{
2104
2
  ccv_cnnp_model_div_t* const model_div = (ccv_cnnp_model_div_t*)cccalloc(1, sizeof(ccv_cnnp_model_div_t));
2105
2
  model_div->super.isa = &ccv_cnnp_div_isa;
2106
2
  model_div->super.input_size = reciprocal ? 
11
:
21
;
2107
2
  model_div->super.outputs = &model_div->output;
2108
2
  model_div->super.output_size = 1;
2109
2
  model_div->reciprocal = reciprocal;
2110
2
  ccv_cnnp_model_copy_name(&model_div->super, name);
2111
2
  return (ccv_cnnp_model_t*)model_div;
2112
2
}
2113
2114
static ccv_cnnp_model_t* _ccv_cnnp_div_copy(const ccv_cnnp_model_t* const super, void* const context)
2115
0
{
2116
0
  const ccv_cnnp_model_div_t* const self = (const ccv_cnnp_model_div_t*)super;
2117
0
  return ccv_cnnp_div(self->reciprocal, self->super.name);
2118
0
}
2119
2120
// MARK - Sqrt Layer
2121
2122
typedef struct {
2123
  ccv_cnnp_model_t super;
2124
  ccv_nnc_tensor_symbol_t output;
2125
} ccv_cnnp_model_sqrt_t;
2126
2127
static void _ccv_cnnp_sqrt_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2128
0
{
2129
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_sqrt_build] -\n");
2130
0
  assert(output_size == 1);
2131
0
  ccv_nnc_tensor_param_t input_params[1];
2132
0
  ccv_nnc_tensor_param_t output_params;
2133
0
  const ccv_nnc_cmd_t sqrt = CMD_EWSQRT_FORWARD();
2134
0
  assert(input_size == 1);
2135
0
  input_params[0] = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2136
0
  ccv_nnc_hint_tensor_auto(sqrt, input_params, 1, ccv_nnc_no_hint, &output_params, 1);
2137
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2138
0
  ccv_nnc_graph_exec_symbol_new(graph, sqrt, inputs, 1, outputs, output_size, "sqrt");
2139
0
}
2140
2141
static ccv_cnnp_model_t* _ccv_cnnp_sqrt_copy(const ccv_cnnp_model_t* const self, void* const context);
2142
2143
static const ccv_cnnp_model_vtab_t ccv_cnnp_sqrt_isa = {
2144
  .build = _ccv_cnnp_sqrt_build,
2145
  .copy = _ccv_cnnp_sqrt_copy,
2146
};
2147
2148
ccv_cnnp_model_t* ccv_cnnp_sqrt(const char* const name)
2149
0
{
2150
0
  ccv_cnnp_model_sqrt_t* const model_sqrt = (ccv_cnnp_model_sqrt_t*)cccalloc(1, sizeof(ccv_cnnp_model_sqrt_t));
2151
0
  model_sqrt->super.isa = &ccv_cnnp_sqrt_isa;
2152
0
  model_sqrt->super.input_size = 1;
2153
0
  model_sqrt->super.outputs = &model_sqrt->output;
2154
0
  model_sqrt->super.output_size = 1;
2155
0
  ccv_cnnp_model_copy_name(&model_sqrt->super, name);
2156
0
  return (ccv_cnnp_model_t*)model_sqrt;
2157
0
}
2158
2159
static ccv_cnnp_model_t* _ccv_cnnp_sqrt_copy(const ccv_cnnp_model_t* const super, void* const context)
2160
0
{
2161
0
  const ccv_cnnp_model_sqrt_t* const self = (const ccv_cnnp_model_sqrt_t*)super;
2162
0
  return ccv_cnnp_sqrt(self->super.name);
2163
0
}
2164
2165
// MARK - Cmul Layer
2166
2167
typedef struct {
2168
  ccv_cnnp_model_t super;
2169
  ccv_nnc_tensor_symbol_t output;
2170
} ccv_cnnp_model_cmul_t;
2171
2172
static void _ccv_cnnp_cmul_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2173
0
{
2174
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_cmul_build] -\n");
2175
0
  assert(input_size == 2);
2176
0
  assert(output_size == 1);
2177
0
  ccv_nnc_tensor_param_t input_params[2];
2178
0
  int i;
2179
0
  for (i = 0; i < 2; i++)
2180
0
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
2181
0
  ccv_nnc_tensor_param_t output_params;
2182
0
  const ccv_nnc_cmd_t mul = CMD_CMUL_FORWARD();
2183
0
  ccv_nnc_hint_tensor_auto(mul, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
2184
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2185
0
  ccv_nnc_graph_exec_symbol_new(graph, mul, inputs, input_size, outputs, output_size, "cmul");
2186
0
}
2187
2188
static ccv_cnnp_model_t* _ccv_cnnp_cmul_copy(const ccv_cnnp_model_t* const self, void* const context);
2189
2190
static const ccv_cnnp_model_vtab_t ccv_cnnp_cmul_isa = {
2191
  .build = _ccv_cnnp_cmul_build,
2192
  .copy = _ccv_cnnp_cmul_copy,
2193
};
2194
2195
ccv_cnnp_model_t* ccv_cnnp_cmul(const char* const name)
2196
0
{
2197
0
  ccv_cnnp_model_cmul_t* const model_cmul = (ccv_cnnp_model_cmul_t*)cccalloc(1, sizeof(ccv_cnnp_model_cmul_t));
2198
0
  model_cmul->super.isa = &ccv_cnnp_cmul_isa;
2199
0
  model_cmul->super.input_size = 2;
2200
0
  model_cmul->super.outputs = &model_cmul->output;
2201
0
  model_cmul->super.output_size = 1;
2202
0
  ccv_cnnp_model_copy_name(&model_cmul->super, name);
2203
0
  return (ccv_cnnp_model_t*)model_cmul;
2204
0
}
2205
2206
static ccv_cnnp_model_t* _ccv_cnnp_cmul_copy(const ccv_cnnp_model_t* const super, void* const context)
2207
0
{
2208
0
  return ccv_cnnp_cmul(super->name);
2209
0
}
2210
2211
// MARK - Transpose Layer
2212
2213
typedef struct {
2214
  ccv_cnnp_model_t super;
2215
  ccv_nnc_tensor_symbol_t output;
2216
  int transpose[2];
2217
} ccv_cnnp_model_transpose_t;
2218
2219
static void _ccv_cnnp_transpose_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2220
22
{
2221
22
  ccv_cnnp_model_transpose_t* const self = (ccv_cnnp_model_transpose_t*)super;
2222
22
  PRINT(CCV_CLI_VERBOSE, "[cnnp_transpose_build] (%d, %d)\n", self->transpose[0], self->transpose[1]);
2223
22
  assert(input_size == 1);
2224
22
  assert(output_size == 1);
2225
22
  if (self->transpose[0] == self->transpose[1])
2226
0
  {
2227
0
    outputs[0] = inputs[0];
2228
0
    return;
2229
0
  }
2230
22
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2231
22
  ccv_nnc_tensor_param_t output_params;
2232
22
  const ccv_nnc_cmd_t transpose = CMD_TRANSPOSE_FORWARD(self->transpose[0], self->transpose[1]);
2233
22
  ccv_nnc_hint_tensor_auto(transpose, (ccv_nnc_tensor_param_t []){
2234
22
      params,
2235
22
    }, 1, ccv_nnc_no_hint, &output_params, 1);
2236
22
  const ccv_nnc_tensor_symbol_t transpose_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2237
22
  ccv_nnc_graph_exec_symbol_new(graph, transpose, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(transpose_output), "transpose");
2238
22
  outputs[0] = transpose_output;
2239
22
}
2240
2241
static ccv_cnnp_model_t* _ccv_cnnp_transpose_copy(const ccv_cnnp_model_t* const super, void* const context);
2242
2243
static const ccv_cnnp_model_vtab_t ccv_cnnp_transpose_isa = {
2244
  .build = _ccv_cnnp_transpose_build,
2245
  .copy = _ccv_cnnp_transpose_copy,
2246
};
2247
2248
ccv_cnnp_model_t* ccv_cnnp_transpose(const int axis_a, const int axis_b, const char* const name)
2249
22
{
2250
22
  ccv_cnnp_model_transpose_t* const model_transpose = (ccv_cnnp_model_transpose_t*)cccalloc(1, sizeof(ccv_cnnp_model_transpose_t));
2251
22
  model_transpose->super.isa = &ccv_cnnp_transpose_isa;
2252
22
  model_transpose->super.input_size = 1;
2253
22
  model_transpose->super.outputs = &model_transpose->output;
2254
22
  model_transpose->super.output_size = 1;
2255
22
  model_transpose->transpose[0] = axis_a;
2256
22
  model_transpose->transpose[1] = axis_b;
2257
22
  ccv_cnnp_model_copy_name(&model_transpose->super, name);
2258
22
  return (ccv_cnnp_model_t*)model_transpose;
2259
22
}
2260
2261
static ccv_cnnp_model_t* _ccv_cnnp_transpose_copy(const ccv_cnnp_model_t* const super, void* const context)
2262
0
{
2263
0
  const ccv_cnnp_model_transpose_t* const self = (const ccv_cnnp_model_transpose_t*)super;
2264
0
  return ccv_cnnp_transpose(self->transpose[0], self->transpose[1], self->super.name);
2265
0
}
2266
2267
// MARK - Layer Norm Layer
2268
2269
typedef struct {
2270
  ccv_cnnp_model_t super;
2271
  ccv_nnc_tensor_symbol_t output;
2272
  ccv_nnc_tensor_symbol_t bias;
2273
  ccv_nnc_tensor_symbol_t scale;
2274
  ccv_nnc_cmd_param_t params;
2275
} ccv_cnnp_model_layer_norm_t;
2276
2277
static void _ccv_cnnp_layer_norm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2278
8
{
2279
8
  PRINT(CCV_CLI_VERBOSE, "[cnnp_layer_norm_build] -\n");
2280
8
  assert(input_size == 1);
2281
8
  assert(output_size == 1);
2282
8
  ccv_cnnp_model_layer_norm_t* const self = (ccv_cnnp_model_layer_norm_t*)super;
2283
8
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2284
8
  ccv_nnc_tensor_param_t bias_params = params;
2285
8
  const int nd = ccv_nnc_tensor_nd(params.dim);
2286
8
  int i;
2287
32
  for (i = 0; i < nd; 
i++24
)
2288
24
    bias_params.dim[i] = 1;
2289
16
  for (i = 0; i < self->params.lnorm.count; 
i++8
)
2290
8
    bias_params.dim[self->params.lnorm.axis[i]] = params.dim[self->params.lnorm.axis[i]];
2291
8
  if (self->params.lnorm.elementwise_affine)
2292
8
  {
2293
    // Both scale and bias are shared between if this model is reused.
2294
8
    if (!self->scale.graph)
2295
8
      self->scale = ccv_nnc_tensor_symbol_new(graph, bias_params, "scale");
2296
8
    if (!self->bias.graph)
2297
8
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
2298
8
  }
2299
8
  const ccv_nnc_cmd_t layer_norm = ccv_nnc_cmd(CCV_NNC_LAYER_NORM_FORWARD, 0, self->params, 0);
2300
8
  ccv_nnc_tensor_param_t output_params[3];
2301
8
  if (self->params.lnorm.elementwise_affine)
2302
8
    ccv_nnc_hint_tensor_auto(layer_norm, (ccv_nnc_tensor_param_t []){
2303
8
        params,
2304
8
        bias_params,
2305
8
        bias_params,
2306
8
      }, 3, ccv_nnc_no_hint, output_params, 3);
2307
0
  else
2308
0
    ccv_nnc_hint_tensor_auto(layer_norm, (ccv_nnc_tensor_param_t []){
2309
0
        params,
2310
0
      }, 1, ccv_nnc_no_hint, output_params, 3);
2311
8
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
2312
8
  const ccv_nnc_tensor_symbol_t saved_mean = ccv_nnc_tensor_symbol_new(graph, output_params[1], "saved_mean");
2313
8
  const ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(graph, output_params[2], "saved_inv_std");
2314
8
  if (self->params.lnorm.elementwise_affine)
2315
8
    ccv_nnc_graph_exec_symbol_new(graph, layer_norm, TENSOR_SYMBOL_LIST(inputs[0], self->scale, self->bias), TENSOR_SYMBOL_LIST(output, saved_mean, saved_inv_std), "layer_norm");
2316
0
  else
2317
0
    ccv_nnc_graph_exec_symbol_new(graph, layer_norm, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(output, saved_mean, saved_inv_std), "layer_norm");
2318
8
  outputs[0] = output;
2319
8
}
2320
2321
static void _ccv_cnnp_layer_norm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
2322
8
{
2323
8
  ccv_cnnp_model_layer_norm_t* const self = (ccv_cnnp_model_layer_norm_t*)super;
2324
8
  if (self->scale.graph)
2325
8
    initializer(context, CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, self->scale);
2326
8
  if (self->bias.graph)
2327
8
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
2328
8
}
2329
2330
static void _ccv_cnnp_layer_norm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
2331
8
{
2332
8
  ccv_cnnp_model_layer_norm_t* const self = (ccv_cnnp_model_layer_norm_t*)super;
2333
8
  if (self->scale.graph)
2334
8
    add_to_array(parameters, self->scale, is_trainable);
2335
8
  if (self->bias.graph)
2336
8
    add_to_array(parameters, self->bias, is_trainable);
2337
8
}
2338
2339
static ccv_cnnp_model_t* _ccv_cnnp_layer_norm_copy(const ccv_cnnp_model_t* const super, void* const context);
2340
2341
static const ccv_cnnp_model_vtab_t ccv_cnnp_layer_norm_isa = {
2342
  .build = _ccv_cnnp_layer_norm_build,
2343
  .init_states = _ccv_cnnp_layer_norm_init_states,
2344
  .add_to_parameter = _ccv_cnnp_layer_norm_add_to_parameter,
2345
  .copy = _ccv_cnnp_layer_norm_copy,
2346
};
2347
2348
ccv_cnnp_model_t* ccv_cnnp_layer_norm(const float epsilon, const int axis[CCV_NNC_MAX_DIM_ALLOC], const int axis_count, const int elementwise_affine, const int is_trainable, const char* const name)
2349
8
{
2350
8
  ccv_cnnp_model_layer_norm_t* const model_layer_norm = (ccv_cnnp_model_layer_norm_t*)cccalloc(1, sizeof(ccv_cnnp_model_layer_norm_t));
2351
8
  model_layer_norm->super.isa = &ccv_cnnp_layer_norm_isa;
2352
8
  model_layer_norm->super.input_size = 1;
2353
8
  model_layer_norm->super.outputs = &model_layer_norm->output;
2354
8
  model_layer_norm->super.output_size = 1;
2355
8
  model_layer_norm->super.is_trainable = is_trainable;
2356
8
  ccv_cnnp_model_copy_name(&model_layer_norm->super, name);
2357
8
  model_layer_norm->scale.d = CCV_NNC_NO_TENSOR_SYMBOL;
2358
8
  model_layer_norm->scale.graph = 0;
2359
8
  model_layer_norm->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
2360
8
  model_layer_norm->bias.graph = 0;
2361
8
  model_layer_norm->params.lnorm.epsilon = epsilon;
2362
8
  model_layer_norm->params.lnorm.count = axis_count;
2363
8
  model_layer_norm->params.lnorm.elementwise_affine = elementwise_affine;
2364
8
  memcpy(model_layer_norm->params.lnorm.axis, axis, sizeof(int) * axis_count);
2365
8
  return (ccv_cnnp_model_t*)model_layer_norm;
2366
8
}
2367
2368
static ccv_cnnp_model_t* _ccv_cnnp_layer_norm_copy(const ccv_cnnp_model_t* const super, void* const context)
2369
0
{
2370
0
  const ccv_cnnp_model_layer_norm_t* const self = (const ccv_cnnp_model_layer_norm_t*)super;
2371
0
  return ccv_cnnp_layer_norm(self->params.lnorm.epsilon, self->params.lnorm.axis, self->params.lnorm.count, self->params.lnorm.elementwise_affine, self->super.is_trainable, self->super.name);
2372
0
}
2373
2374
// MARK - Group Norm Layer
2375
2376
typedef struct {
2377
  ccv_cnnp_model_t super;
2378
  ccv_nnc_tensor_symbol_t output;
2379
  ccv_nnc_tensor_symbol_t bias;
2380
  ccv_nnc_tensor_symbol_t scale;
2381
  ccv_nnc_cmd_param_t params;
2382
} ccv_cnnp_model_group_norm_t;
2383
2384
static void _ccv_cnnp_group_norm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2385
0
{
2386
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_group_norm_build] -\n");
2387
0
  assert(input_size == 1);
2388
0
  assert(output_size == 1);
2389
0
  ccv_cnnp_model_group_norm_t* const self = (ccv_cnnp_model_group_norm_t*)super;
2390
0
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2391
0
  ccv_nnc_tensor_param_t bias_params = params;
2392
0
  const int nd = ccv_nnc_tensor_nd(params.dim);
2393
0
  int i;
2394
0
  for (i = 0; i < nd; i++)
2395
0
    bias_params.dim[i] = 1;
2396
0
  bias_params.dim[self->params.gnorm.group_axis] = params.dim[self->params.gnorm.group_axis];
2397
0
  if (self->params.gnorm.elementwise_affine)
2398
0
  {
2399
    // Both scale and bias are shared between if this model is reused.
2400
0
    if (!self->scale.graph)
2401
0
      self->scale = ccv_nnc_tensor_symbol_new(graph, bias_params, "scale");
2402
0
    if (!self->bias.graph)
2403
0
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
2404
0
  }
2405
0
  const ccv_nnc_cmd_t group_norm = ccv_nnc_cmd(CCV_NNC_GROUP_NORM_FORWARD, 0, self->params, 0);
2406
0
  ccv_nnc_tensor_param_t output_params[3];
2407
0
  if (self->params.gnorm.elementwise_affine)
2408
0
    ccv_nnc_hint_tensor_auto(group_norm, (ccv_nnc_tensor_param_t []){
2409
0
        params,
2410
0
        bias_params,
2411
0
        bias_params,
2412
0
      }, 3, ccv_nnc_no_hint, output_params, 3);
2413
0
  else
2414
0
    ccv_nnc_hint_tensor_auto(group_norm, (ccv_nnc_tensor_param_t []){
2415
0
        params,
2416
0
      }, 1, ccv_nnc_no_hint, output_params, 3);
2417
0
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
2418
0
  const ccv_nnc_tensor_symbol_t saved_mean = ccv_nnc_tensor_symbol_new(graph, output_params[1], "saved_mean");
2419
0
  const ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(graph, output_params[2], "saved_inv_std");
2420
0
  if (self->params.gnorm.elementwise_affine)
2421
0
    ccv_nnc_graph_exec_symbol_new(graph, group_norm, TENSOR_SYMBOL_LIST(inputs[0], self->scale, self->bias), TENSOR_SYMBOL_LIST(output, saved_mean, saved_inv_std), "group_norm");
2422
0
  else
2423
0
    ccv_nnc_graph_exec_symbol_new(graph, group_norm, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(output, saved_mean, saved_inv_std), "group_norm");
2424
0
  outputs[0] = output;
2425
0
}
2426
2427
static void _ccv_cnnp_group_norm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
2428
0
{
2429
0
  ccv_cnnp_model_group_norm_t* const self = (ccv_cnnp_model_group_norm_t*)super;
2430
0
  if (self->scale.graph)
2431
0
    initializer(context, CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, self->scale);
2432
0
  if (self->bias.graph)
2433
0
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
2434
0
}
2435
2436
static void _ccv_cnnp_group_norm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
2437
0
{
2438
0
  ccv_cnnp_model_group_norm_t* const self = (ccv_cnnp_model_group_norm_t*)super;
2439
0
  if (self->scale.graph)
2440
0
    add_to_array(parameters, self->scale, is_trainable);
2441
0
  if (self->bias.graph)
2442
0
    add_to_array(parameters, self->bias, is_trainable);
2443
0
}
2444
2445
static ccv_cnnp_model_t* _ccv_cnnp_group_norm_copy(const ccv_cnnp_model_t* const super, void* const context);
2446
2447
static const ccv_cnnp_model_vtab_t ccv_cnnp_group_norm_isa = {
2448
  .build = _ccv_cnnp_group_norm_build,
2449
  .init_states = _ccv_cnnp_group_norm_init_states,
2450
  .add_to_parameter = _ccv_cnnp_group_norm_add_to_parameter,
2451
  .copy = _ccv_cnnp_group_norm_copy,
2452
};
2453
2454
ccv_cnnp_model_t* ccv_cnnp_group_norm(const int group_axis, const int groups, const float epsilon, const int reduce_axis[CCV_NNC_MAX_DIM_ALLOC], const int axis_count, const int elementwise_affine, const int is_trainable, const char* const name)
2455
0
{
2456
0
  ccv_cnnp_model_group_norm_t* const model_group_norm = (ccv_cnnp_model_group_norm_t*)cccalloc(1, sizeof(ccv_cnnp_model_group_norm_t));
2457
0
  model_group_norm->super.isa = &ccv_cnnp_group_norm_isa;
2458
0
  model_group_norm->super.input_size = 1;
2459
0
  model_group_norm->super.outputs = &model_group_norm->output;
2460
0
  model_group_norm->super.output_size = 1;
2461
0
  model_group_norm->super.is_trainable = is_trainable;
2462
0
  ccv_cnnp_model_copy_name(&model_group_norm->super, name);
2463
0
  model_group_norm->scale.d = CCV_NNC_NO_TENSOR_SYMBOL;
2464
0
  model_group_norm->scale.graph = 0;
2465
0
  model_group_norm->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
2466
0
  model_group_norm->bias.graph = 0;
2467
0
  model_group_norm->params.gnorm.group_axis = group_axis;
2468
0
  model_group_norm->params.gnorm.groups = groups;
2469
0
  model_group_norm->params.gnorm.epsilon = epsilon;
2470
0
  model_group_norm->params.gnorm.reduce_count = axis_count;
2471
0
  model_group_norm->params.gnorm.elementwise_affine = elementwise_affine;
2472
0
  memcpy(model_group_norm->params.gnorm.reduce_axis, reduce_axis, sizeof(int) * axis_count);
2473
0
  return (ccv_cnnp_model_t*)model_group_norm;
2474
0
}
2475
2476
static ccv_cnnp_model_t* _ccv_cnnp_group_norm_copy(const ccv_cnnp_model_t* const super, void* const context)
2477
0
{
2478
0
  const ccv_cnnp_model_group_norm_t* const self = (const ccv_cnnp_model_group_norm_t*)super;
2479
0
  return ccv_cnnp_group_norm(self->params.gnorm.group_axis, self->params.gnorm.groups, self->params.gnorm.epsilon, self->params.gnorm.reduce_axis, self->params.gnorm.reduce_count, self->params.gnorm.elementwise_affine, self->super.is_trainable, self->super.name);
2480
0
}
2481
2482
// MARK - RMSNorm Layer
2483
2484
typedef struct {
2485
  ccv_cnnp_model_t super;
2486
  ccv_nnc_tensor_symbol_t output;
2487
  ccv_nnc_tensor_symbol_t scale;
2488
  ccv_nnc_cmd_param_t params;
2489
} ccv_cnnp_model_rmsnorm_t;
2490
2491
static void _ccv_cnnp_rmsnorm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2492
0
{
2493
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_rmsnorm_build] -\n");
2494
0
  assert(input_size == 1);
2495
0
  assert(output_size == 1);
2496
0
  ccv_cnnp_model_rmsnorm_t* const self = (ccv_cnnp_model_rmsnorm_t*)super;
2497
0
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2498
0
  ccv_nnc_tensor_param_t scale_params = params;
2499
0
  const int nd = ccv_nnc_tensor_nd(params.dim);
2500
0
  int i;
2501
0
  for (i = 0; i < nd; i++)
2502
0
    scale_params.dim[i] = 1;
2503
0
  for (i = 0; i < self->params.rmsnorm.count; i++)
2504
0
    scale_params.dim[self->params.rmsnorm.axis[i]] = params.dim[self->params.rmsnorm.axis[i]];
2505
  // Both scale and bias are shared between if this model is reused.
2506
0
  if (!self->scale.graph)
2507
0
    self->scale = ccv_nnc_tensor_symbol_new(graph, scale_params, "scale");
2508
0
  const ccv_nnc_cmd_t rmsnorm = ccv_nnc_cmd(CCV_NNC_RMSNORM_FORWARD, 0, self->params, 0);
2509
0
  ccv_nnc_tensor_param_t output_params[2];
2510
0
  ccv_nnc_hint_tensor_auto(rmsnorm, (ccv_nnc_tensor_param_t []){
2511
0
      params,
2512
0
      scale_params,
2513
0
    }, 2, ccv_nnc_no_hint, output_params, 2);
2514
0
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
2515
0
  const ccv_nnc_tensor_symbol_t saved_inv_std = ccv_nnc_tensor_symbol_new(graph, output_params[1], "saved_inv_std");
2516
0
  ccv_nnc_graph_exec_symbol_new(graph, rmsnorm, TENSOR_SYMBOL_LIST(inputs[0], self->scale), TENSOR_SYMBOL_LIST(output, saved_inv_std), "rmsnorm");
2517
0
  outputs[0] = output;
2518
0
}
2519
2520
static void _ccv_cnnp_rmsnorm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
2521
0
{
2522
0
  ccv_cnnp_model_rmsnorm_t* const self = (ccv_cnnp_model_rmsnorm_t*)super;
2523
0
  if (self->scale.graph)
2524
0
    initializer(context, CMD_SET_FORWARD(1), ccv_nnc_no_hint, 0, 0, self->scale);
2525
0
}
2526
2527
static void _ccv_cnnp_rmsnorm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
2528
0
{
2529
0
  ccv_cnnp_model_rmsnorm_t* const self = (ccv_cnnp_model_rmsnorm_t*)super;
2530
0
  if (self->scale.graph)
2531
0
    add_to_array(parameters, self->scale, is_trainable);
2532
0
}
2533
2534
static ccv_cnnp_model_t* _ccv_cnnp_rmsnorm_copy(const ccv_cnnp_model_t* const super, void* const context);
2535
2536
static const ccv_cnnp_model_vtab_t ccv_cnnp_rmsnorm_isa = {
2537
  .build = _ccv_cnnp_rmsnorm_build,
2538
  .init_states = _ccv_cnnp_rmsnorm_init_states,
2539
  .add_to_parameter = _ccv_cnnp_rmsnorm_add_to_parameter,
2540
  .copy = _ccv_cnnp_rmsnorm_copy,
2541
};
2542
2543
ccv_cnnp_model_t* ccv_cnnp_rmsnorm(const float epsilon, const int axis[CCV_NNC_MAX_DIM_ALLOC], const int axis_count, const int is_trainable, const char* const name)
2544
0
{
2545
0
  ccv_cnnp_model_rmsnorm_t* const model_rmsnorm = (ccv_cnnp_model_rmsnorm_t*)cccalloc(1, sizeof(ccv_cnnp_model_rmsnorm_t));
2546
0
  model_rmsnorm->super.isa = &ccv_cnnp_rmsnorm_isa;
2547
0
  model_rmsnorm->super.input_size = 1;
2548
0
  model_rmsnorm->super.outputs = &model_rmsnorm->output;
2549
0
  model_rmsnorm->super.output_size = 1;
2550
0
  model_rmsnorm->super.is_trainable = is_trainable;
2551
0
  ccv_cnnp_model_copy_name(&model_rmsnorm->super, name);
2552
0
  model_rmsnorm->scale.d = CCV_NNC_NO_TENSOR_SYMBOL;
2553
0
  model_rmsnorm->scale.graph = 0;
2554
0
  model_rmsnorm->params.rmsnorm.epsilon = epsilon;
2555
0
  model_rmsnorm->params.rmsnorm.count = axis_count;
2556
0
  memcpy(model_rmsnorm->params.lnorm.axis, axis, sizeof(int) * axis_count);
2557
0
  return (ccv_cnnp_model_t*)model_rmsnorm;
2558
0
}
2559
2560
static ccv_cnnp_model_t* _ccv_cnnp_rmsnorm_copy(const ccv_cnnp_model_t* const super, void* const context)
2561
0
{
2562
0
  const ccv_cnnp_model_rmsnorm_t* const self = (const ccv_cnnp_model_rmsnorm_t*)super;
2563
0
  return ccv_cnnp_rmsnorm(self->params.rmsnorm.epsilon, self->params.rmsnorm.axis, self->params.rmsnorm.count, self->super.is_trainable, self->super.name);
2564
0
}
2565
2566
// MARK - Batched Matrix Mul Layer
2567
2568
typedef struct {
2569
  ccv_cnnp_model_t super;
2570
  ccv_nnc_tensor_symbol_t output;
2571
  int transpose_a[2];
2572
  int transpose_b[2];
2573
  int flags;
2574
} ccv_cnnp_model_matmul_t;
2575
2576
static void _ccv_cnnp_matmul_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2577
10
{
2578
10
  PRINT(CCV_CLI_VERBOSE, "[cnnp_matmul_build] -\n");
2579
10
  assert(input_size == 2);
2580
10
  assert(output_size == 1);
2581
10
  ccv_cnnp_model_matmul_t* const self = (ccv_cnnp_model_matmul_t*)super;
2582
10
  ccv_nnc_tensor_param_t a_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2583
10
  ccv_nnc_tensor_param_t b_params = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
2584
10
  ccv_nnc_tensor_param_t output_params;
2585
10
  ccv_nnc_cmd_t matmul = CMD_GEMM_FORWARD(self->transpose_a, self->transpose_b);
2586
10
  matmul.info.blas.flags = self->flags;
2587
10
  ccv_nnc_hint_tensor_auto(matmul, (ccv_nnc_tensor_param_t []){
2588
10
      a_params,
2589
10
      b_params,
2590
10
    }, 2, ccv_nnc_no_hint, &output_params, 1);
2591
10
  const ccv_nnc_tensor_symbol_t matmul_output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2592
10
  ccv_nnc_graph_exec_symbol_new(graph, matmul, inputs, input_size, TENSOR_SYMBOL_LIST(matmul_output), "matmul");
2593
10
  outputs[0] = matmul_output;
2594
10
}
2595
2596
static ccv_cnnp_model_t* _ccv_cnnp_matmul_copy(const ccv_cnnp_model_t* const super, void* const context);
2597
2598
static const ccv_cnnp_model_vtab_t ccv_cnnp_matmul_isa = {
2599
  .build = _ccv_cnnp_matmul_build,
2600
  .copy = _ccv_cnnp_matmul_copy,
2601
};
2602
2603
ccv_cnnp_model_t* ccv_cnnp_matmul(const int transpose_a[2], const int transpose_b[2], const int flags, const char* const name)
2604
10
{
2605
10
  ccv_cnnp_model_matmul_t* const model_matmul = (ccv_cnnp_model_matmul_t*)cccalloc(1, sizeof(ccv_cnnp_model_matmul_t));
2606
10
  model_matmul->super.isa = &ccv_cnnp_matmul_isa;
2607
10
  model_matmul->super.input_size = 2;
2608
10
  model_matmul->super.outputs = &model_matmul->output;
2609
10
  model_matmul->super.output_size = 1;
2610
10
  model_matmul->transpose_a[0] = transpose_a[0];
2611
10
  model_matmul->transpose_a[1] = transpose_a[1];
2612
10
  model_matmul->transpose_b[0] = transpose_b[0];
2613
10
  model_matmul->transpose_b[1] = transpose_b[1];
2614
10
  model_matmul->flags = flags;
2615
10
  ccv_cnnp_model_copy_name(&model_matmul->super, name);
2616
10
  return (ccv_cnnp_model_t*)model_matmul;
2617
10
}
2618
2619
static ccv_cnnp_model_t* _ccv_cnnp_matmul_copy(const ccv_cnnp_model_t* const super, void* const context)
2620
1
{
2621
1
  const ccv_cnnp_model_matmul_t* const self = (const ccv_cnnp_model_matmul_t*)super;
2622
1
  return ccv_cnnp_matmul(self->transpose_a, self->transpose_b, self->flags, self->super.name);
2623
1
}
2624
2625
// MARK - Dropout Layer
2626
2627
typedef struct {
2628
  ccv_cnnp_model_t super;
2629
  ccv_nnc_tensor_symbol_t output;
2630
  ccv_nnc_graph_exec_symbol_t dropout;
2631
  float p;
2632
  int entirety;
2633
} ccv_cnnp_model_dropout_t;
2634
2635
static void _ccv_cnnp_dropout_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2636
12
{
2637
12
  PRINT(CCV_CLI_VERBOSE, "[cnnp_dropout_build] -\n");
2638
12
  assert(input_size == 1);
2639
12
  assert(output_size == 1);
2640
12
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2641
12
  ccv_nnc_tensor_param_t output_params[2];
2642
12
  ccv_cnnp_model_dropout_t* const self = (ccv_cnnp_model_dropout_t*)super;
2643
12
  const ccv_nnc_cmd_t dropout = CMD_DROPOUT_FORWARD(self->p, self->entirety);
2644
12
  ccv_nnc_hint_tensor_auto(dropout, (ccv_nnc_tensor_param_t []){
2645
12
      params,
2646
12
    }, 1, ccv_nnc_no_hint, output_params, 2);
2647
12
  const ccv_nnc_tensor_symbol_t dropout_output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
2648
12
  const ccv_nnc_tensor_symbol_t mask = ccv_nnc_tensor_symbol_new(graph, output_params[1], "mask");
2649
12
  self->dropout = ccv_nnc_graph_exec_symbol_new(graph, dropout, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(dropout_output, mask), "dropout");
2650
12
  outputs[0] = dropout_output;
2651
12
}
2652
2653
static void _ccv_cnnp_dropout_set_is_test(ccv_cnnp_model_t* const super, const int is_test, const ccv_cnnp_cmd_updater_f updater, void* const context)
2654
24
{
2655
24
  ccv_cnnp_model_dropout_t* const self = (ccv_cnnp_model_dropout_t*)super;
2656
24
  if (self->dropout.graph)
2657
24
  {
2658
24
    if (is_test)
2659
      // During test, the dropout is not applied. Data transfer is perfect because if these are the same tensor, it will skip.
2660
12
      updater(context, self->dropout, CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint);
2661
12
    else
2662
12
      updater(context, self->dropout, CMD_DROPOUT_FORWARD(self->p, self->entirety), ccv_nnc_no_hint);
2663
24
  }
2664
24
}
2665
2666
static ccv_cnnp_model_t* _ccv_cnnp_dropout_copy(const ccv_cnnp_model_t* const super, void* const context);
2667
2668
static const ccv_cnnp_model_vtab_t ccv_cnnp_dropout_isa = {
2669
  .build = _ccv_cnnp_dropout_build,
2670
  .set_is_test = _ccv_cnnp_dropout_set_is_test,
2671
  .copy = _ccv_cnnp_dropout_copy,
2672
};
2673
2674
ccv_cnnp_model_t* ccv_cnnp_dropout(const float p, const int entirety, const char* const name)
2675
12
{
2676
12
  ccv_cnnp_model_dropout_t* const model_dropout = (ccv_cnnp_model_dropout_t*)cccalloc(1, sizeof(ccv_cnnp_model_dropout_t));
2677
12
  model_dropout->super.isa = &ccv_cnnp_dropout_isa;
2678
12
  model_dropout->super.input_size = 1;
2679
12
  model_dropout->super.outputs = &model_dropout->output;
2680
12
  model_dropout->super.output_size = 1;
2681
12
  model_dropout->p = p;
2682
12
  model_dropout->entirety = entirety;
2683
12
  ccv_cnnp_model_copy_name(&model_dropout->super, name);
2684
12
  return (ccv_cnnp_model_t*)model_dropout;
2685
12
}
2686
2687
static ccv_cnnp_model_t* _ccv_cnnp_dropout_copy(const ccv_cnnp_model_t* const super, void* const context)
2688
0
{
2689
0
  const ccv_cnnp_model_dropout_t* const self = (const ccv_cnnp_model_dropout_t*)super;
2690
0
  return ccv_cnnp_dropout(self->p, self->entirety, self->super.name);
2691
0
}
2692
2693
// MARK - Masked Fill Layer
2694
2695
typedef struct {
2696
  ccv_cnnp_model_t super;
2697
  ccv_nnc_tensor_symbol_t output;
2698
  float eq;
2699
  float fill;
2700
} ccv_cnnp_model_masked_fill_t;
2701
2702
static void _ccv_cnnp_masked_fill_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2703
4
{
2704
4
  PRINT(CCV_CLI_VERBOSE, "[cnnp_masked_fill_build] -\n");
2705
4
  assert(input_size == 2);
2706
4
  assert(output_size == 1);
2707
4
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2708
4
  ccv_cnnp_model_masked_fill_t* const self = (ccv_cnnp_model_masked_fill_t*)super;
2709
4
  const ccv_nnc_tensor_symbol_t masked_fill_output = ccv_nnc_tensor_symbol_new(graph, params, 0);
2710
4
  ccv_nnc_graph_exec_symbol_new(graph, CMD_MASKED_FILL_FORWARD(self->eq, self->fill), TENSOR_SYMBOL_LIST(inputs[0], inputs[1]), TENSOR_SYMBOL_LIST(masked_fill_output), "masked_fill");
2711
4
  outputs[0] = masked_fill_output;
2712
4
}
2713
2714
static ccv_cnnp_model_t* _ccv_cnnp_masked_fill_copy(const ccv_cnnp_model_t* const super, void* const context);
2715
2716
static const ccv_cnnp_model_vtab_t ccv_cnnp_masked_fill_isa = {
2717
  .build = _ccv_cnnp_masked_fill_build,
2718
  .copy = _ccv_cnnp_masked_fill_copy,
2719
};
2720
2721
ccv_cnnp_model_t* ccv_cnnp_masked_fill(const float eq, const float fill, const char* const name)
2722
4
{
2723
4
  ccv_cnnp_model_masked_fill_t* const model_masked_fill = (ccv_cnnp_model_masked_fill_t*)cccalloc(1, sizeof(ccv_cnnp_model_masked_fill_t));
2724
4
  model_masked_fill->super.isa = &ccv_cnnp_masked_fill_isa;
2725
4
  model_masked_fill->super.input_size = 2;
2726
4
  model_masked_fill->super.outputs = &model_masked_fill->output;
2727
4
  model_masked_fill->super.output_size = 1;
2728
4
  model_masked_fill->eq = eq;
2729
4
  model_masked_fill->fill = fill;
2730
4
  ccv_cnnp_model_copy_name(&model_masked_fill->super, name);
2731
4
  return (ccv_cnnp_model_t*)model_masked_fill;
2732
4
}
2733
2734
static ccv_cnnp_model_t* _ccv_cnnp_masked_fill_copy(const ccv_cnnp_model_t* const super, void* const context)
2735
0
{
2736
0
  const ccv_cnnp_model_masked_fill_t* const self = (const ccv_cnnp_model_masked_fill_t*)super;
2737
0
  return ccv_cnnp_masked_fill(self->eq, self->fill, self->super.name);
2738
0
}
2739
2740
// MARK - Index Select Layer
2741
2742
typedef struct {
2743
  ccv_cnnp_model_t super;
2744
  ccv_nnc_tensor_symbol_t output;
2745
} ccv_cnnp_model_index_select_t;
2746
2747
static void _ccv_cnnp_index_select_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2748
2
{
2749
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_index_select_build] -\n");
2750
2
  assert(input_size == 2);
2751
2
  assert(output_size == 1);
2752
2
  const ccv_nnc_tensor_param_t vocab_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2753
2
  const ccv_nnc_tensor_param_t index_params = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
2754
2
  ccv_nnc_tensor_param_t output_params;
2755
2
  const ccv_nnc_cmd_t index_select = CMD_INDEX_SELECT_FORWARD();
2756
2
  ccv_nnc_hint_tensor_auto(index_select, (ccv_nnc_tensor_param_t []){
2757
2
      vocab_params,
2758
2
      index_params,
2759
2
    }, 2, ccv_nnc_no_hint, &output_params, 1);
2760
2
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2761
2
  ccv_nnc_graph_exec_symbol_new(graph, index_select, TENSOR_SYMBOL_LIST(inputs[0], inputs[1]), TENSOR_SYMBOL_LIST(output), "index_select");
2762
2
  outputs[0] = output;
2763
2
}
2764
2765
static ccv_cnnp_model_t* _ccv_cnnp_index_select_copy(const ccv_cnnp_model_t* const super, void* const context);
2766
2767
static const ccv_cnnp_model_vtab_t ccv_cnnp_index_select_isa = {
2768
  .build = _ccv_cnnp_index_select_build,
2769
  .copy = _ccv_cnnp_index_select_copy,
2770
};
2771
2772
ccv_cnnp_model_t* ccv_cnnp_index_select(const char* const name)
2773
2
{
2774
2
  ccv_cnnp_model_index_select_t* const model_index_select = (ccv_cnnp_model_index_select_t*)cccalloc(1, sizeof(ccv_cnnp_model_index_select_t));
2775
2
  model_index_select->super.isa = &ccv_cnnp_index_select_isa;
2776
2
  model_index_select->super.input_size = 2;
2777
2
  model_index_select->super.outputs = &model_index_select->output;
2778
2
  model_index_select->super.output_size = 1;
2779
2
  ccv_cnnp_model_copy_name(&model_index_select->super, name);
2780
2
  return (ccv_cnnp_model_t*)model_index_select;
2781
2
}
2782
2783
static ccv_cnnp_model_t* _ccv_cnnp_index_select_copy(const ccv_cnnp_model_t* const super, void* const context)
2784
0
{
2785
0
  ccv_cnnp_model_index_select_t* const self = (ccv_cnnp_model_index_select_t*)super;
2786
0
  return ccv_cnnp_index_select(self->super.name);
2787
0
}
2788
2789
// MARK - Embedding Layer
2790
2791
typedef struct {
2792
  ccv_cnnp_model_t super;
2793
  ccv_nnc_tensor_symbol_t output;
2794
  ccv_nnc_tensor_symbol_t vocab;
2795
  int datatype;
2796
  int vocab_size;
2797
  int embed_size;
2798
} ccv_cnnp_model_embedding_t;
2799
2800
static void _ccv_cnnp_embedding_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2801
1
{
2802
1
  ccv_cnnp_model_embedding_t* const self = (ccv_cnnp_model_embedding_t*)super;
2803
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_embedding_build] vocab_size: %d, embed_size: %d\n", self->vocab_size, self->embed_size);
2804
1
  assert(input_size == 1);
2805
1
  assert(output_size == 1);
2806
1
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2807
1
  ccv_nnc_tensor_param_t vocab_params = params;
2808
1
  memset(vocab_params.dim, 0, sizeof(vocab_params.dim));
2809
1
  vocab_params.datatype = self->datatype;
2810
1
  vocab_params.dim[0] = self->vocab_size;
2811
1
  vocab_params.dim[1] = self->embed_size;
2812
1
  if (!self->vocab.graph)
2813
1
    self->vocab = ccv_nnc_tensor_symbol_new(graph, vocab_params, "vocab");
2814
1
  assert(self->vocab.graph == graph);
2815
1
  ccv_nnc_tensor_param_t output_params;
2816
1
  const ccv_nnc_cmd_t embedding = CMD_INDEX_SELECT_FORWARD();
2817
1
  ccv_nnc_hint_tensor_auto(embedding, (ccv_nnc_tensor_param_t []){
2818
1
      vocab_params,
2819
1
      params,
2820
1
    }, 2, ccv_nnc_no_hint, &output_params, 1);
2821
1
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2822
1
  ccv_nnc_graph_exec_symbol_new(graph, embedding, TENSOR_SYMBOL_LIST(self->vocab, inputs[0]), TENSOR_SYMBOL_LIST(output), "embedding");
2823
1
  outputs[0] = output;
2824
1
}
2825
2826
static void _ccv_cnnp_embedding_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
2827
1
{
2828
1
  ccv_cnnp_model_embedding_t* const self = (ccv_cnnp_model_embedding_t*)super;
2829
1
  const float std = sqrtf(2) / sqrtf(self->vocab_size + self->embed_size);
2830
1
  const float bound = sqrtf(3) * std;
2831
1
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->vocab);
2832
1
}
2833
2834
static void _ccv_cnnp_embedding_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
2835
1
{
2836
1
  ccv_cnnp_model_embedding_t* const self = (ccv_cnnp_model_embedding_t*)super;
2837
1
  add_to_array(parameters, self->vocab, is_trainable);
2838
1
}
2839
2840
static ccv_cnnp_model_t* _ccv_cnnp_embedding_copy(const ccv_cnnp_model_t* const super, void* const context);
2841
2842
static const ccv_cnnp_model_vtab_t ccv_cnnp_embedding_isa = {
2843
  .build = _ccv_cnnp_embedding_build,
2844
  .init_states = _ccv_cnnp_embedding_init_states,
2845
  .add_to_parameter = _ccv_cnnp_embedding_add_to_parameter,
2846
  .copy = _ccv_cnnp_embedding_copy,
2847
};
2848
2849
ccv_cnnp_model_t* ccv_cnnp_embedding(const int datatype, const int vocab_size, const int embed_size, const int is_trainable, const char* const name)
2850
1
{
2851
1
  ccv_cnnp_model_embedding_t* const model_embedding = (ccv_cnnp_model_embedding_t*)cccalloc(1, sizeof(ccv_cnnp_model_embedding_t));
2852
1
  model_embedding->super.isa = &ccv_cnnp_embedding_isa;
2853
1
  model_embedding->super.input_size = 1;
2854
1
  model_embedding->super.outputs = &model_embedding->output;
2855
1
  model_embedding->super.output_size = 1;
2856
1
  model_embedding->super.is_trainable = is_trainable;
2857
1
  ccv_cnnp_model_copy_name(&model_embedding->super, name);
2858
1
  model_embedding->vocab.d = CCV_NNC_NO_TENSOR_SYMBOL;
2859
1
  model_embedding->vocab.graph = 0;
2860
1
  assert(datatype == CCV_32F || datatype == CCV_16F);
2861
1
  model_embedding->datatype = datatype;
2862
1
  assert(vocab_size > 0);
2863
1
  model_embedding->vocab_size = vocab_size;
2864
1
  assert(embed_size > 0);
2865
1
  model_embedding->embed_size = embed_size;
2866
1
  return (ccv_cnnp_model_t*)model_embedding;
2867
1
}
2868
2869
static ccv_cnnp_model_t* _ccv_cnnp_embedding_copy(const ccv_cnnp_model_t* const super, void* const context)
2870
0
{
2871
0
  ccv_cnnp_model_embedding_t* const self = (ccv_cnnp_model_embedding_t*)super;
2872
0
  return ccv_cnnp_embedding(self->datatype, self->vocab_size, self->embed_size, self->super.is_trainable, self->super.name);
2873
0
}
2874
2875
// MARK - Pool Layers
2876
2877
typedef struct {
2878
  ccv_cnnp_model_t super;
2879
  ccv_nnc_tensor_symbol_t output;
2880
  int type;
2881
  float width_scale;
2882
  float height_scale;
2883
  int align_corners;
2884
} ccv_cnnp_model_upsample_t;
2885
2886
static void _ccv_cnnp_upsample_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2887
3
{
2888
3
  PRINT(CCV_CLI_VERBOSE, "[cnnp_upsample_build] -\n");
2889
3
  assert(input_size == 1);
2890
3
  assert(output_size == 1);
2891
3
  ccv_cnnp_model_upsample_t* const self = (ccv_cnnp_model_upsample_t*)super;
2892
3
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2893
3
  ccv_nnc_cmd_t cmd = CMD_UPSAMPLE_FORWARD(self->type, self->width_scale, self->height_scale, self->align_corners);
2894
3
  ccv_nnc_tensor_param_t output_params;
2895
3
  ccv_nnc_hint_tensor_auto(cmd, &params, 1, ccv_nnc_no_hint, &output_params, 1);
2896
3
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2897
3
  ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0]), TENSOR_SYMBOL_LIST(output), "upsample");
2898
3
  outputs[0] = output;
2899
3
}
2900
2901
static ccv_cnnp_model_t* _ccv_cnnp_upsample_copy(const ccv_cnnp_model_t* const super, void* const context);
2902
2903
static const ccv_cnnp_model_vtab_t ccv_cnnp_upsample_isa = {
2904
  .build = _ccv_cnnp_upsample_build,
2905
  .copy = _ccv_cnnp_upsample_copy,
2906
};
2907
2908
ccv_cnnp_model_t* ccv_cnnp_upsample(const int type, const float width_scale, const float height_scale, const int align_corners, const char* const name)
2909
3
{
2910
3
  ccv_cnnp_model_upsample_t* const model_upsample = (ccv_cnnp_model_upsample_t*)cccalloc(1, sizeof(ccv_cnnp_model_upsample_t));
2911
3
  model_upsample->super.isa = &ccv_cnnp_upsample_isa;
2912
3
  model_upsample->super.input_size = 1;
2913
3
  model_upsample->super.outputs = &model_upsample->output;
2914
3
  model_upsample->super.output_size = 1;
2915
3
  ccv_cnnp_model_copy_name(&model_upsample->super, name);
2916
3
  assert(type == CCV_NNC_UPSAMPLE_NEAREST || type == CCV_NNC_UPSAMPLE_BILINEAR);
2917
3
  model_upsample->type = type;
2918
3
  model_upsample->width_scale = width_scale;
2919
3
  model_upsample->height_scale = height_scale;
2920
3
  model_upsample->align_corners = align_corners;
2921
3
  return (ccv_cnnp_model_t*)model_upsample;
2922
3
}
2923
2924
static ccv_cnnp_model_t* _ccv_cnnp_upsample_copy(const ccv_cnnp_model_t* const super, void* const context)
2925
0
{
2926
0
  const ccv_cnnp_model_upsample_t* const self = (const ccv_cnnp_model_upsample_t*)super;
2927
0
  return ccv_cnnp_upsample(self->type, self->width_scale, self->height_scale, self->align_corners, self->super.name);
2928
0
}
2929
2930
// MARK - Reduce Sum Layer
2931
2932
typedef struct {
2933
  ccv_cnnp_model_t super;
2934
  int axis[CCV_NNC_MAX_DIM_ALLOC];
2935
  int count;
2936
  ccv_nnc_tensor_symbol_t output;
2937
} ccv_cnnp_model_reduce_sum_t;
2938
2939
static void _ccv_cnnp_reduce_sum_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2940
1
{
2941
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_sum_build] -\n");
2942
1
  const ccv_cnnp_model_reduce_sum_t* const self = (const ccv_cnnp_model_reduce_sum_t*)super;
2943
1
  assert(input_size == 1);
2944
1
  assert(output_size == 1);
2945
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
2946
1
  ccv_nnc_tensor_param_t output_params;
2947
1
  ccv_nnc_cmd_t reduce_sum = CMD_REDUCE_SUM_FORWARD();
2948
1
  int i;
2949
2
  for (i = 0; i < self->count; 
i++1
)
2950
1
    reduce_sum.info.reduce.axis[i] = self->axis[i];
2951
1
  reduce_sum.info.reduce.count = self->count;
2952
1
  ccv_nnc_hint_tensor_auto(reduce_sum, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
2953
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
2954
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_sum, inputs, input_size, outputs, output_size, "reduce_sum");
2955
1
}
2956
2957
static ccv_cnnp_model_t* _ccv_cnnp_reduce_sum_copy(const ccv_cnnp_model_t* const self, void* const context);
2958
2959
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_sum_isa = {
2960
  .build = _ccv_cnnp_reduce_sum_build,
2961
  .copy = _ccv_cnnp_reduce_sum_copy,
2962
};
2963
2964
ccv_cnnp_model_t* ccv_cnnp_reduce_sum(const int* const axis, const int axis_count, const char* const name)
2965
1
{
2966
1
  ccv_cnnp_model_reduce_sum_t* const model_reduce_sum = (ccv_cnnp_model_reduce_sum_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_sum_t));
2967
1
  model_reduce_sum->super.isa = &ccv_cnnp_reduce_sum_isa;
2968
1
  model_reduce_sum->super.input_size = 1;
2969
1
  model_reduce_sum->super.outputs = &model_reduce_sum->output;
2970
1
  model_reduce_sum->super.output_size = 1;
2971
1
  ccv_cnnp_model_copy_name(&model_reduce_sum->super, name);
2972
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
2973
1
  int i;
2974
2
  for (i = 0; i < axis_count; 
i++1
)
2975
1
    model_reduce_sum->axis[i] = axis[i];
2976
1
  model_reduce_sum->count = axis_count;
2977
1
  return (ccv_cnnp_model_t*)model_reduce_sum;
2978
1
}
2979
2980
static ccv_cnnp_model_t* _ccv_cnnp_reduce_sum_copy(const ccv_cnnp_model_t* const super, void* const context)
2981
0
{
2982
0
  const ccv_cnnp_model_reduce_sum_t* const self = (const ccv_cnnp_model_reduce_sum_t*)super;
2983
0
  return ccv_cnnp_reduce_sum(self->axis, self->count, self->super.name);
2984
0
}
2985
2986
// MARK - Reduce Mean Layer
2987
2988
typedef struct {
2989
  ccv_cnnp_model_t super;
2990
  int axis[CCV_NNC_MAX_DIM_ALLOC];
2991
  int count;
2992
  ccv_nnc_tensor_symbol_t output;
2993
} ccv_cnnp_model_reduce_mean_t;
2994
2995
static void _ccv_cnnp_reduce_mean_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
2996
1
{
2997
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_mean_build] -\n");
2998
1
  const ccv_cnnp_model_reduce_mean_t* const self = (const ccv_cnnp_model_reduce_mean_t*)super;
2999
1
  assert(input_size == 1);
3000
1
  assert(output_size == 1);
3001
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3002
1
  ccv_nnc_tensor_param_t output_params;
3003
1
  ccv_nnc_cmd_t reduce_mean = CMD_REDUCE_MEAN_FORWARD();
3004
1
  int i;
3005
2
  for (i = 0; i < self->count; 
i++1
)
3006
1
    reduce_mean.info.reduce.axis[i] = self->axis[i];
3007
1
  reduce_mean.info.reduce.count = self->count;
3008
1
  ccv_nnc_hint_tensor_auto(reduce_mean, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3009
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3010
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_mean, inputs, input_size, outputs, output_size, "reduce_mean");
3011
1
}
3012
3013
static ccv_cnnp_model_t* _ccv_cnnp_reduce_mean_copy(const ccv_cnnp_model_t* const self, void* const context);
3014
3015
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_mean_isa = {
3016
  .build = _ccv_cnnp_reduce_mean_build,
3017
  .copy = _ccv_cnnp_reduce_mean_copy,
3018
};
3019
3020
ccv_cnnp_model_t* ccv_cnnp_reduce_mean(const int* const axis, const int axis_count, const char* const name)
3021
1
{
3022
1
  ccv_cnnp_model_reduce_mean_t* const model_reduce_mean = (ccv_cnnp_model_reduce_mean_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_mean_t));
3023
1
  model_reduce_mean->super.isa = &ccv_cnnp_reduce_mean_isa;
3024
1
  model_reduce_mean->super.input_size = 1;
3025
1
  model_reduce_mean->super.outputs = &model_reduce_mean->output;
3026
1
  model_reduce_mean->super.output_size = 1;
3027
1
  ccv_cnnp_model_copy_name(&model_reduce_mean->super, name);
3028
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
3029
1
  int i;
3030
2
  for (i = 0; i < axis_count; 
i++1
)
3031
1
    model_reduce_mean->axis[i] = axis[i];
3032
1
  model_reduce_mean->count = axis_count;
3033
1
  return (ccv_cnnp_model_t*)model_reduce_mean;
3034
1
}
3035
3036
static ccv_cnnp_model_t* _ccv_cnnp_reduce_mean_copy(const ccv_cnnp_model_t* const super, void* const context)
3037
0
{
3038
0
  const ccv_cnnp_model_reduce_mean_t* const self = (const ccv_cnnp_model_reduce_mean_t*)super;
3039
0
  return ccv_cnnp_reduce_mean(self->axis, self->count, self->super.name);
3040
0
}
3041
3042
// MARK - Reduce Max Layer
3043
3044
typedef struct {
3045
  ccv_cnnp_model_t super;
3046
  int axis[CCV_NNC_MAX_DIM_ALLOC];
3047
  int count;
3048
  ccv_nnc_tensor_symbol_t output;
3049
} ccv_cnnp_model_reduce_max_t;
3050
3051
static void _ccv_cnnp_reduce_max_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3052
1
{
3053
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_max_build] -\n");
3054
1
  const ccv_cnnp_model_reduce_max_t* const self = (const ccv_cnnp_model_reduce_max_t*)super;
3055
1
  assert(input_size == 1);
3056
1
  assert(output_size == 1);
3057
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3058
1
  ccv_nnc_tensor_param_t output_params;
3059
1
  ccv_nnc_cmd_t reduce_max = CMD_REDUCE_MAX_FORWARD();
3060
1
  int i;
3061
2
  for (i = 0; i < self->count; 
i++1
)
3062
1
    reduce_max.info.reduce.axis[i] = self->axis[i];
3063
1
  reduce_max.info.reduce.count = self->count;
3064
1
  ccv_nnc_hint_tensor_auto(reduce_max, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3065
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3066
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_max, inputs, input_size, outputs, output_size, "reduce_max");
3067
1
}
3068
3069
static ccv_cnnp_model_t* _ccv_cnnp_reduce_max_copy(const ccv_cnnp_model_t* const self, void* const context);
3070
3071
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_max_isa = {
3072
  .build = _ccv_cnnp_reduce_max_build,
3073
  .copy = _ccv_cnnp_reduce_max_copy,
3074
};
3075
3076
ccv_cnnp_model_t* ccv_cnnp_reduce_max(const int* const axis, const int axis_count, const char* const name)
3077
1
{
3078
1
  ccv_cnnp_model_reduce_max_t* const model_reduce_max = (ccv_cnnp_model_reduce_max_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_max_t));
3079
1
  model_reduce_max->super.isa = &ccv_cnnp_reduce_max_isa;
3080
1
  model_reduce_max->super.input_size = 1;
3081
1
  model_reduce_max->super.outputs = &model_reduce_max->output;
3082
1
  model_reduce_max->super.output_size = 1;
3083
1
  ccv_cnnp_model_copy_name(&model_reduce_max->super, name);
3084
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
3085
1
  int i;
3086
2
  for (i = 0; i < axis_count; 
i++1
)
3087
1
    model_reduce_max->axis[i] = axis[i];
3088
1
  model_reduce_max->count = axis_count;
3089
1
  return (ccv_cnnp_model_t*)model_reduce_max;
3090
1
}
3091
3092
static ccv_cnnp_model_t* _ccv_cnnp_reduce_max_copy(const ccv_cnnp_model_t* const super, void* const context)
3093
0
{
3094
0
  const ccv_cnnp_model_reduce_max_t* const self = (const ccv_cnnp_model_reduce_max_t*)super;
3095
0
  return ccv_cnnp_reduce_max(self->axis, self->count, self->super.name);
3096
0
}
3097
3098
// MARK - Reduce Min Layer
3099
3100
typedef struct {
3101
  ccv_cnnp_model_t super;
3102
  int axis[CCV_NNC_MAX_DIM_ALLOC];
3103
  int count;
3104
  ccv_nnc_tensor_symbol_t output;
3105
} ccv_cnnp_model_reduce_min_t;
3106
3107
static void _ccv_cnnp_reduce_min_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3108
1
{
3109
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_min_build] -\n");
3110
1
  const ccv_cnnp_model_reduce_min_t* const self = (const ccv_cnnp_model_reduce_min_t*)super;
3111
1
  assert(input_size == 1);
3112
1
  assert(output_size == 1);
3113
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3114
1
  ccv_nnc_tensor_param_t output_params;
3115
1
  ccv_nnc_cmd_t reduce_min = CMD_REDUCE_MIN_FORWARD();
3116
1
  int i;
3117
2
  for (i = 0; i < self->count; 
i++1
)
3118
1
    reduce_min.info.reduce.axis[i] = self->axis[i];
3119
1
  reduce_min.info.reduce.count = self->count;
3120
1
  ccv_nnc_hint_tensor_auto(reduce_min, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3121
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3122
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_min, inputs, input_size, outputs, output_size, "reduce_min");
3123
1
}
3124
3125
static ccv_cnnp_model_t* _ccv_cnnp_reduce_min_copy(const ccv_cnnp_model_t* const self, void* const context);
3126
3127
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_min_isa = {
3128
  .build = _ccv_cnnp_reduce_min_build,
3129
  .copy = _ccv_cnnp_reduce_min_copy,
3130
};
3131
3132
ccv_cnnp_model_t* ccv_cnnp_reduce_min(const int* const axis, const int axis_count, const char* const name)
3133
1
{
3134
1
  ccv_cnnp_model_reduce_min_t* const model_reduce_min = (ccv_cnnp_model_reduce_min_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_min_t));
3135
1
  model_reduce_min->super.isa = &ccv_cnnp_reduce_min_isa;
3136
1
  model_reduce_min->super.input_size = 1;
3137
1
  model_reduce_min->super.outputs = &model_reduce_min->output;
3138
1
  model_reduce_min->super.output_size = 1;
3139
1
  ccv_cnnp_model_copy_name(&model_reduce_min->super, name);
3140
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
3141
1
  int i;
3142
2
  for (i = 0; i < axis_count; 
i++1
)
3143
1
    model_reduce_min->axis[i] = axis[i];
3144
1
  model_reduce_min->count = axis_count;
3145
1
  return (ccv_cnnp_model_t*)model_reduce_min;
3146
1
}
3147
3148
static ccv_cnnp_model_t* _ccv_cnnp_reduce_min_copy(const ccv_cnnp_model_t* const super, void* const context)
3149
0
{
3150
0
  const ccv_cnnp_model_reduce_min_t* const self = (const ccv_cnnp_model_reduce_min_t*)super;
3151
0
  return ccv_cnnp_reduce_min(self->axis, self->count, self->super.name);
3152
0
}
3153
3154
// MARK - Reduce Norm2 Layer
3155
3156
typedef struct {
3157
  ccv_cnnp_model_t super;
3158
  int axis[CCV_NNC_MAX_DIM_ALLOC];
3159
  int count;
3160
  ccv_nnc_tensor_symbol_t output;
3161
} ccv_cnnp_model_reduce_norm2_t;
3162
3163
static void _ccv_cnnp_reduce_norm2_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3164
1
{
3165
1
  const ccv_cnnp_model_reduce_norm2_t* const self = (const ccv_cnnp_model_reduce_norm2_t*)super;
3166
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_reduce_norm2_build] -\n");
3167
1
  assert(input_size == 1);
3168
1
  assert(output_size == 1);
3169
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3170
1
  ccv_nnc_tensor_param_t output_params;
3171
1
  ccv_nnc_cmd_t reduce_norm2 = CMD_REDUCE_NORM2_FORWARD();
3172
1
  int i;
3173
2
  for (i = 0; i < self->count; 
i++1
)
3174
1
    reduce_norm2.info.reduce.axis[i] = self->axis[i];
3175
1
  reduce_norm2.info.reduce.count = self->count;
3176
1
  ccv_nnc_hint_tensor_auto(reduce_norm2, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3177
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3178
1
  ccv_nnc_graph_exec_symbol_new(graph, reduce_norm2, inputs, input_size, outputs, output_size, "reduce_norm2");
3179
1
}
3180
3181
static ccv_cnnp_model_t* _ccv_cnnp_reduce_norm2_copy(const ccv_cnnp_model_t* const self, void* const context);
3182
3183
static const ccv_cnnp_model_vtab_t ccv_cnnp_reduce_norm2_isa = {
3184
  .build = _ccv_cnnp_reduce_norm2_build,
3185
  .copy = _ccv_cnnp_reduce_norm2_copy,
3186
};
3187
3188
ccv_cnnp_model_t* ccv_cnnp_reduce_norm2(const int* const axis, const int axis_count, const char* const name)
3189
1
{
3190
1
  ccv_cnnp_model_reduce_norm2_t* const model_reduce_norm2 = (ccv_cnnp_model_reduce_norm2_t*)cccalloc(1, sizeof(ccv_cnnp_model_reduce_norm2_t));
3191
1
  model_reduce_norm2->super.isa = &ccv_cnnp_reduce_norm2_isa;
3192
1
  model_reduce_norm2->super.input_size = 1;
3193
1
  model_reduce_norm2->super.outputs = &model_reduce_norm2->output;
3194
1
  model_reduce_norm2->super.output_size = 1;
3195
1
  ccv_cnnp_model_copy_name(&model_reduce_norm2->super, name);
3196
1
  assert(axis_count <= CCV_NNC_MAX_DIM_ALLOC);
3197
1
  int i;
3198
2
  for (i = 0; i < axis_count; 
i++1
)
3199
1
    model_reduce_norm2->axis[i] = axis[i];
3200
1
  model_reduce_norm2->count = axis_count;
3201
1
  return (ccv_cnnp_model_t*)model_reduce_norm2;
3202
1
}
3203
3204
static ccv_cnnp_model_t* _ccv_cnnp_reduce_norm2_copy(const ccv_cnnp_model_t* const super, void* const context)
3205
0
{
3206
0
  const ccv_cnnp_model_reduce_norm2_t* const self = (const ccv_cnnp_model_reduce_norm2_t*)super;
3207
0
  return ccv_cnnp_reduce_norm2(self->axis, self->count, self->super.name);
3208
0
}
3209
3210
// MARK - Argmax Layer
3211
3212
typedef struct {
3213
  ccv_cnnp_model_t super;
3214
  int axis;
3215
  ccv_nnc_tensor_symbol_t output;
3216
} ccv_cnnp_model_argmax_t;
3217
3218
static void _ccv_cnnp_argmax_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3219
1
{
3220
1
  const ccv_cnnp_model_argmax_t* const self = (const ccv_cnnp_model_argmax_t*)super;
3221
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_argmax_build] -\n");
3222
1
  assert(input_size == 1);
3223
1
  assert(output_size == 1);
3224
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3225
1
  ccv_nnc_tensor_param_t output_params;
3226
1
  ccv_nnc_cmd_t argmax = CMD_ARGMAX_FORWARD();
3227
1
  argmax.info.reduce.axis[0] = self->axis;
3228
1
  argmax.info.reduce.count = 1;
3229
1
  ccv_nnc_hint_tensor_auto(argmax, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3230
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3231
1
  ccv_nnc_graph_exec_symbol_new(graph, argmax, inputs, input_size, outputs, output_size, "argmax");
3232
1
}
3233
3234
static ccv_cnnp_model_t* _ccv_cnnp_argmax_copy(const ccv_cnnp_model_t* const self, void* const context);
3235
3236
static const ccv_cnnp_model_vtab_t ccv_cnnp_argmax_isa = {
3237
  .build = _ccv_cnnp_argmax_build,
3238
  .copy = _ccv_cnnp_argmax_copy,
3239
};
3240
3241
ccv_cnnp_model_t* ccv_cnnp_argmax(const int axis, const char* const name)
3242
1
{
3243
1
  ccv_cnnp_model_argmax_t* const model_argmax = (ccv_cnnp_model_argmax_t*)cccalloc(1, sizeof(ccv_cnnp_model_argmax_t));
3244
1
  model_argmax->super.isa = &ccv_cnnp_argmax_isa;
3245
1
  model_argmax->super.input_size = 1;
3246
1
  model_argmax->super.outputs = &model_argmax->output;
3247
1
  model_argmax->super.output_size = 1;
3248
1
  ccv_cnnp_model_copy_name(&model_argmax->super, name);
3249
1
  model_argmax->axis = axis;
3250
1
  return (ccv_cnnp_model_t*)model_argmax;
3251
1
}
3252
3253
static ccv_cnnp_model_t* _ccv_cnnp_argmax_copy(const ccv_cnnp_model_t* const super, void* const context)
3254
0
{
3255
0
  const ccv_cnnp_model_argmax_t* const self = (const ccv_cnnp_model_argmax_t*)super;
3256
0
  return ccv_cnnp_argmax(self->axis, self->super.name);
3257
0
}
3258
3259
// MARK - Argmin Layer
3260
3261
typedef struct {
3262
  ccv_cnnp_model_t super;
3263
  int axis;
3264
  ccv_nnc_tensor_symbol_t output;
3265
} ccv_cnnp_model_argmin_t;
3266
3267
static void _ccv_cnnp_argmin_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3268
1
{
3269
1
  const ccv_cnnp_model_argmin_t* const self = (const ccv_cnnp_model_argmin_t*)super;
3270
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_argmin_build] -\n");
3271
1
  assert(input_size == 1);
3272
1
  assert(output_size == 1);
3273
1
  ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3274
1
  ccv_nnc_tensor_param_t output_params;
3275
1
  ccv_nnc_cmd_t argmin = CMD_ARGMIN_FORWARD();
3276
1
  argmin.info.reduce.axis[0] = self->axis;
3277
1
  argmin.info.reduce.count = 1;
3278
1
  ccv_nnc_hint_tensor_auto(argmin, &input_params, 1, ccv_nnc_no_hint, &output_params, 1);
3279
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3280
1
  ccv_nnc_graph_exec_symbol_new(graph, argmin, inputs, input_size, outputs, output_size, "argmin");
3281
1
}
3282
3283
static ccv_cnnp_model_t* _ccv_cnnp_argmin_copy(const ccv_cnnp_model_t* const self, void* const context);
3284
3285
static const ccv_cnnp_model_vtab_t ccv_cnnp_argmin_isa = {
3286
  .build = _ccv_cnnp_argmin_build,
3287
  .copy = _ccv_cnnp_argmin_copy,
3288
};
3289
3290
ccv_cnnp_model_t* ccv_cnnp_argmin(const int axis, const char* const name)
3291
1
{
3292
1
  ccv_cnnp_model_argmin_t* const model_argmin = (ccv_cnnp_model_argmin_t*)cccalloc(1, sizeof(ccv_cnnp_model_argmin_t));
3293
1
  model_argmin->super.isa = &ccv_cnnp_argmin_isa;
3294
1
  model_argmin->super.input_size = 1;
3295
1
  model_argmin->super.outputs = &model_argmin->output;
3296
1
  model_argmin->super.output_size = 1;
3297
1
  ccv_cnnp_model_copy_name(&model_argmin->super, name);
3298
1
  model_argmin->axis = axis;
3299
1
  return (ccv_cnnp_model_t*)model_argmin;
3300
1
}
3301
3302
static ccv_cnnp_model_t* _ccv_cnnp_argmin_copy(const ccv_cnnp_model_t* const super, void* const context)
3303
0
{
3304
0
  const ccv_cnnp_model_argmin_t* const self = (const ccv_cnnp_model_argmin_t*)super;
3305
0
  return ccv_cnnp_argmin(self->axis, self->super.name);
3306
0
}
3307
3308
// MARK - Min Layer
3309
3310
typedef struct {
3311
  ccv_cnnp_model_t super;
3312
  ccv_nnc_tensor_symbol_t output;
3313
} ccv_cnnp_model_min_t;
3314
3315
static void _ccv_cnnp_min_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3316
1
{
3317
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_min_build] -\n");
3318
1
  assert(input_size == 2);
3319
1
  assert(output_size == 1);
3320
1
  ccv_nnc_tensor_param_t input_params[2];
3321
1
  int i;
3322
3
  for (i = 0; i < 2; 
i++2
)
3323
2
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
3324
1
  ccv_nnc_tensor_param_t output_params;
3325
1
  const ccv_nnc_cmd_t min = CMD_MIN_FORWARD();
3326
1
  ccv_nnc_hint_tensor_auto(min, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
3327
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3328
1
  ccv_nnc_graph_exec_symbol_new(graph, min, inputs, input_size, outputs, output_size, "min");
3329
1
}
3330
3331
static ccv_cnnp_model_t* _ccv_cnnp_min_copy(const ccv_cnnp_model_t* const self, void* const context);
3332
3333
static const ccv_cnnp_model_vtab_t ccv_cnnp_min_isa = {
3334
  .build = _ccv_cnnp_min_build,
3335
  .copy = _ccv_cnnp_min_copy,
3336
};
3337
3338
ccv_cnnp_model_t* ccv_cnnp_min(const char* const name)
3339
1
{
3340
1
  ccv_cnnp_model_min_t* const model_min = (ccv_cnnp_model_min_t*)cccalloc(1, sizeof(ccv_cnnp_model_min_t));
3341
1
  model_min->super.isa = &ccv_cnnp_min_isa;
3342
1
  model_min->super.input_size = 2;
3343
1
  model_min->super.outputs = &model_min->output;
3344
1
  model_min->super.output_size = 1;
3345
1
  ccv_cnnp_model_copy_name(&model_min->super, name);
3346
1
  return (ccv_cnnp_model_t*)model_min;
3347
1
}
3348
3349
static ccv_cnnp_model_t* _ccv_cnnp_min_copy(const ccv_cnnp_model_t* const super, void* const context)
3350
0
{
3351
0
  const ccv_cnnp_model_min_t* const self = (const ccv_cnnp_model_min_t*)super;
3352
0
  return ccv_cnnp_min(self->super.name);
3353
0
}
3354
3355
// MARK - Max Layer
3356
3357
typedef struct {
3358
  ccv_cnnp_model_t super;
3359
  ccv_nnc_tensor_symbol_t output;
3360
} ccv_cnnp_model_max_t;
3361
3362
static void _ccv_cnnp_max_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3363
1
{
3364
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_max_build] -\n");
3365
1
  assert(input_size == 2);
3366
1
  assert(output_size == 1);
3367
1
  ccv_nnc_tensor_param_t input_params[2];
3368
1
  int i;
3369
3
  for (i = 0; i < 2; 
i++2
)
3370
2
    input_params[i] = ccv_nnc_tensor_symbol_params(graph, inputs[i]);
3371
1
  ccv_nnc_tensor_param_t output_params;
3372
1
  const ccv_nnc_cmd_t max = CMD_MAX_FORWARD();
3373
1
  ccv_nnc_hint_tensor_auto(max, input_params, 2, ccv_nnc_no_hint, &output_params, 1);
3374
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
3375
1
  ccv_nnc_graph_exec_symbol_new(graph, max, inputs, input_size, outputs, output_size, "max");
3376
1
}
3377
3378
static ccv_cnnp_model_t* _ccv_cnnp_max_copy(const ccv_cnnp_model_t* const self, void* const context);
3379
3380
static const ccv_cnnp_model_vtab_t ccv_cnnp_max_isa = {
3381
  .build = _ccv_cnnp_max_build,
3382
  .copy = _ccv_cnnp_max_copy,
3383
};
3384
3385
ccv_cnnp_model_t* ccv_cnnp_max(const char* const name)
3386
1
{
3387
1
  ccv_cnnp_model_max_t* const model_max = (ccv_cnnp_model_max_t*)cccalloc(1, sizeof(ccv_cnnp_model_max_t));
3388
1
  model_max->super.isa = &ccv_cnnp_max_isa;
3389
1
  model_max->super.input_size = 2;
3390
1
  model_max->super.outputs = &model_max->output;
3391
1
  model_max->super.output_size = 1;
3392
1
  ccv_cnnp_model_copy_name(&model_max->super, name);
3393
1
  return (ccv_cnnp_model_t*)model_max;
3394
1
}
3395
3396
static ccv_cnnp_model_t* _ccv_cnnp_max_copy(const ccv_cnnp_model_t* const super, void* const context)
3397
0
{
3398
0
  const ccv_cnnp_model_max_t* const self = (const ccv_cnnp_model_max_t*)super;
3399
0
  return ccv_cnnp_max(self->super.name);
3400
0
}
3401
3402
// MARK - LSTM Layer
3403
3404
typedef struct {
3405
  ccv_cnnp_model_t super;
3406
  int masked;
3407
  ccv_nnc_tensor_symbol_t output;
3408
  ccv_nnc_tensor_symbol_t weights;
3409
  ccv_nnc_tensor_symbol_t reserves;
3410
  ccv_nnc_cmd_param_t params;
3411
  ccv_nnc_graph_exec_symbol_t lstm;
3412
} ccv_cnnp_model_lstm_t;
3413
3414
static int _ccv_cnnp_lstm_weight_dim(int bidirectional, int num_layers, int input_size, int hidden_size, int proj_size, int bias)
3415
1
{
3416
1
  const int D = !!bidirectional + 1;
3417
1
  if (hidden_size == proj_size)
3418
1
    return (num_layers * (bias ? 8 : 
00
) + (num_layers - 1) * (hidden_size * 4 * D + hidden_size * 4) + input_size * 4 + hidden_size * 4) * D;
3419
0
  else
3420
0
    return (num_layers * (bias ? 8 : 0) + (num_layers - 1) * (proj_size * 4 * D + proj_size * 4) + (proj_size * 4 + input_size * 4) + num_layers * proj_size) * D;
3421
1
}
3422
3423
static void _ccv_cnnp_lstm_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3424
1
{
3425
1
  ccv_cnnp_model_lstm_t* const self = (ccv_cnnp_model_lstm_t*)super;
3426
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_lstm_build] -\n");
3427
1
  assert(input_size == self->super.input_size);
3428
1
  assert(output_size == 1);
3429
1
  const int proj_size = self->params.rnn.proj_size == 0 ? self->params.rnn.hidden_size : 
self->params.rnn.proj_size0
;
3430
1
  ccv_nnc_tensor_param_t input_params[5];
3431
1
  input_params[0]= ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3432
1
  if (input_size == 2)
3433
1
    input_params[1] = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
3434
1
  input_params[4] = input_params[0];
3435
1
  memset(input_params[4].dim, 0, sizeof(input_params[4].dim));
3436
1
  const int x_nd = ccv_nnc_tensor_nd(input_params[0].dim);
3437
1
  const int feature_count = input_params[0].dim[x_nd - 1];
3438
1
  input_params[4].dim[0] = _ccv_cnnp_lstm_weight_dim(self->params.rnn.bidirectional, self->params.rnn.num_layers, feature_count, self->params.rnn.hidden_size, proj_size, self->params.rnn.bias);
3439
1
  input_params[4].dim[1] = self->params.rnn.hidden_size;
3440
1
  const ccv_nnc_cmd_t lstm = ccv_nnc_cmd(CCV_NNC_LSTM_FORWARD, 0, self->params, 0);
3441
1
  ccv_nnc_tensor_param_t output_params[4];
3442
1
  ccv_nnc_hint_tensor_auto(lstm, input_params, 5, ccv_nnc_no_hint, output_params, 4);
3443
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
3444
1
  if (!self->weights.graph)
3445
1
    self->weights = ccv_nnc_tensor_symbol_new(graph, input_params[4], "weights");
3446
1
  if (!self->reserves.graph)
3447
1
    self->reserves = ccv_nnc_tensor_symbol_new(graph, output_params[3], "reserves");
3448
1
  const ccv_nnc_tensor_symbol_t mask = input_size == 2 ? inputs[1] : 
NO_TENSOR_SYMBOL0
;
3449
1
  self->lstm = ccv_nnc_graph_exec_symbol_new(graph, lstm, TENSOR_SYMBOL_LIST(inputs[0], mask, NO_TENSOR_SYMBOL, NO_TENSOR_SYMBOL, self->weights), TENSOR_SYMBOL_LIST(outputs[0], NO_TENSOR_SYMBOL, NO_TENSOR_SYMBOL, self->reserves), "lstm");
3450
1
}
3451
3452
static void _ccv_cnnp_lstm_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
3453
1
{
3454
1
  ccv_cnnp_model_lstm_t* const self = (ccv_cnnp_model_lstm_t*)super;
3455
1
  if (self->weights.graph)
3456
1
  {
3457
1
    const float stdv = 1.0 / sqrt(self->params.rnn.hidden_size);
3458
1
    initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-stdv, stdv), ccv_nnc_no_hint, 0, 0, self->weights);
3459
1
  }
3460
1
}
3461
3462
static void _ccv_cnnp_lstm_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
3463
1
{
3464
1
  ccv_cnnp_model_lstm_t* const self = (ccv_cnnp_model_lstm_t*)super;
3465
1
  if (self->weights.graph)
3466
1
    add_to_array(parameters, self->weights, is_trainable);
3467
1
}
3468
3469
static void _ccv_cnnp_lstm_set_is_test(ccv_cnnp_model_t* const super, const int is_test, const ccv_cnnp_cmd_updater_f updater, void* const context)
3470
2
{
3471
2
  ccv_cnnp_model_lstm_t* const self = (ccv_cnnp_model_lstm_t*)super;
3472
2
  if (self->lstm.graph)
3473
2
  {
3474
2
    self->params.rnn.is_test = is_test;
3475
2
    updater(context, self->lstm, ccv_nnc_cmd(CCV_NNC_LSTM_FORWARD, 0, self->params, 0), ccv_nnc_no_hint);
3476
2
  }
3477
2
}
3478
3479
static ccv_cnnp_model_t* _ccv_cnnp_lstm_copy(const ccv_cnnp_model_t* const self, void* const context);
3480
3481
static const ccv_cnnp_model_vtab_t ccv_cnnp_lstm_isa = {
3482
  .build = _ccv_cnnp_lstm_build,
3483
  .init_states = _ccv_cnnp_lstm_init_states,
3484
  .add_to_parameter = _ccv_cnnp_lstm_add_to_parameter,
3485
  .copy = _ccv_cnnp_lstm_copy,
3486
  .set_is_test = _ccv_cnnp_lstm_set_is_test,
3487
};
3488
3489
ccv_cnnp_model_t* ccv_cnnp_lstm(const int masked, const int hidden_size, const int proj_size, const int num_layers, const int bias, const int batch_first, const int bidirectional, const float dropout, const int is_trainable, const char* const name)
3490
1
{
3491
1
  ccv_cnnp_model_lstm_t* const model_lstm = (ccv_cnnp_model_lstm_t*)cccalloc(1, sizeof(ccv_cnnp_model_lstm_t));
3492
1
  model_lstm->super.isa = &ccv_cnnp_lstm_isa;
3493
1
  model_lstm->super.input_size = masked ? 2 : 
10
;
3494
1
  model_lstm->super.outputs = &model_lstm->output;
3495
1
  model_lstm->super.output_size = 1;
3496
1
  model_lstm->super.is_trainable = is_trainable;
3497
1
  ccv_cnnp_model_copy_name(&model_lstm->super, name);
3498
1
  model_lstm->masked = masked;
3499
1
  model_lstm->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
3500
1
  model_lstm->weights.graph = 0;
3501
1
  model_lstm->params.rnn.hidden_size = hidden_size;
3502
1
  model_lstm->params.rnn.proj_size = proj_size;
3503
1
  model_lstm->params.rnn.num_layers = num_layers;
3504
1
  model_lstm->params.rnn.bias = bias;
3505
1
  model_lstm->params.rnn.batch_first = batch_first;
3506
1
  model_lstm->params.rnn.bidirectional = bidirectional;
3507
1
  model_lstm->params.rnn.dropout = dropout;
3508
1
  return (ccv_cnnp_model_t*)model_lstm;
3509
1
}
3510
3511
static ccv_cnnp_model_t* _ccv_cnnp_lstm_copy(const ccv_cnnp_model_t* const super, void* const context)
3512
0
{
3513
0
  const ccv_cnnp_model_lstm_t* const self = (const ccv_cnnp_model_lstm_t*)super;
3514
0
  return ccv_cnnp_lstm(self->masked, self->params.rnn.hidden_size, self->params.rnn.proj_size, self->params.rnn.num_layers, self->params.rnn.bias, self->params.rnn.batch_first, self->params.rnn.bidirectional, self->params.rnn.dropout, self->super.is_trainable, self->super.name);
3515
0
}
3516
3517
/// MARK - Datatype conversion layer.
3518
3519
typedef struct {
3520
  ccv_cnnp_model_t super;
3521
  ccv_nnc_tensor_symbol_t output;
3522
  int datatype;
3523
  int ref_to_last;
3524
} ccv_cnnp_model_datatype_conversion_t;
3525
3526
static void _ccv_cnnp_datatype_conversion_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3527
2
{
3528
2
  ccv_cnnp_model_datatype_conversion_t* const self = (ccv_cnnp_model_datatype_conversion_t*)super;
3529
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_datatype_conversion_build] -\n");
3530
2
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3531
2
  if (self->ref_to_last)
3532
1
  {
3533
1
    assert(input_size > 1);
3534
1
    const ccv_nnc_tensor_param_t last_params = ccv_nnc_tensor_symbol_params(graph, inputs[input_size - 1]);
3535
1
    params.datatype = last_params.datatype;
3536
1
  } else
3537
1
    params.datatype = self->datatype;
3538
2
  assert(output_size == 1);
3539
2
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
3540
2
  ccv_nnc_graph_exec_symbol_new(graph, CMD_DATATYPE_CONVERSION_FORWARD(), inputs, output_size /* intentional */, outputs, output_size, 0);
3541
2
}
3542
3543
static ccv_cnnp_model_t* _ccv_cnnp_datatype_conversion_copy(const ccv_cnnp_model_t* const self, void* const context);
3544
3545
static const ccv_cnnp_model_vtab_t ccv_cnnp_datatype_conversion_isa = {
3546
  .build = _ccv_cnnp_datatype_conversion_build,
3547
  .copy = _ccv_cnnp_datatype_conversion_copy,
3548
};
3549
3550
ccv_cnnp_model_t* ccv_cnnp_datatype_conversion(const int datatype, const int ref_to_last, const char* const name)
3551
2
{
3552
2
  ccv_cnnp_model_datatype_conversion_t* const model_datatype_conversion = (ccv_cnnp_model_datatype_conversion_t*)cccalloc(1, sizeof(ccv_cnnp_model_datatype_conversion_t));
3553
2
  model_datatype_conversion->super.isa = &ccv_cnnp_datatype_conversion_isa;
3554
2
  model_datatype_conversion->super.input_size = 0;
3555
2
  model_datatype_conversion->super.outputs = &model_datatype_conversion->output;
3556
2
  model_datatype_conversion->super.output_size = 1;
3557
2
  model_datatype_conversion->datatype = datatype;
3558
2
  model_datatype_conversion->ref_to_last = ref_to_last;
3559
2
  ccv_cnnp_model_copy_name(&model_datatype_conversion->super, name);
3560
2
  return (ccv_cnnp_model_t*)model_datatype_conversion;
3561
2
}
3562
3563
static ccv_cnnp_model_t* _ccv_cnnp_datatype_conversion_copy(const ccv_cnnp_model_t* const super, void* const context)
3564
0
{
3565
0
  ccv_cnnp_model_datatype_conversion_t* const self = (ccv_cnnp_model_datatype_conversion_t*)super;
3566
0
  return ccv_cnnp_datatype_conversion(self->datatype, self->ref_to_last, self->super.name);
3567
0
}
3568
3569
/// MARK - Clamp layer.
3570
3571
typedef struct {
3572
  ccv_cnnp_model_t super;
3573
  ccv_nnc_tensor_symbol_t output;
3574
  float min;
3575
  float max;
3576
} ccv_cnnp_model_clamp_t;
3577
3578
static void _ccv_cnnp_clamp_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3579
0
{
3580
0
  ccv_cnnp_model_clamp_t* const self = (ccv_cnnp_model_clamp_t*)super;
3581
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_clamp_build] -\n");
3582
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3583
0
  assert(output_size == 1);
3584
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
3585
0
  ccv_nnc_graph_exec_symbol_new(graph, CMD_CLAMP_FORWARD(self->min, self->max), inputs, output_size /* intentional */, outputs, output_size, 0);
3586
0
}
3587
3588
static ccv_cnnp_model_t* _ccv_cnnp_clamp_copy(const ccv_cnnp_model_t* const self, void* const context);
3589
3590
static const ccv_cnnp_model_vtab_t ccv_cnnp_clamp_isa = {
3591
  .build = _ccv_cnnp_clamp_build,
3592
  .copy = _ccv_cnnp_clamp_copy,
3593
};
3594
3595
ccv_cnnp_model_t* ccv_cnnp_clamp(const float min, const float max, const char* const name)
3596
0
{
3597
0
  ccv_cnnp_model_clamp_t* const model_clamp = (ccv_cnnp_model_clamp_t*)cccalloc(1, sizeof(ccv_cnnp_model_clamp_t));
3598
0
  model_clamp->super.isa = &ccv_cnnp_clamp_isa;
3599
0
  model_clamp->super.input_size = 0;
3600
0
  model_clamp->super.outputs = &model_clamp->output;
3601
0
  model_clamp->super.output_size = 1;
3602
0
  model_clamp->min = min;
3603
0
  model_clamp->max = max;
3604
0
  ccv_cnnp_model_copy_name(&model_clamp->super, name);
3605
0
  return (ccv_cnnp_model_t*)model_clamp;
3606
0
}
3607
3608
static ccv_cnnp_model_t* _ccv_cnnp_clamp_copy(const ccv_cnnp_model_t* const super, void* const context)
3609
0
{
3610
0
  ccv_cnnp_model_clamp_t* const self = (ccv_cnnp_model_clamp_t*)super;
3611
0
  return ccv_cnnp_clamp(self->min, self->max, self->super.name);
3612
0
}
3613
3614
// MARK - Parameter Layer
3615
3616
typedef struct {
3617
  ccv_cnnp_model_t super;
3618
  float init_bound;
3619
  ccv_nnc_tensor_symbol_t weights;
3620
  ccv_nnc_tensor_param_t weights_params;
3621
  ccv_nnc_tensor_symbol_t output;
3622
} ccv_cnnp_model_parameter_t;
3623
3624
static void _ccv_cnnp_parameter_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3625
1
{
3626
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_parameter_build] -\n");
3627
1
  assert(output_size == 1);
3628
1
  ccv_cnnp_model_parameter_t* const self = (ccv_cnnp_model_parameter_t*)super;
3629
1
  if (!self->weights.graph)
3630
1
    self->weights = ccv_nnc_tensor_symbol_new(graph, self->weights_params, "weights");
3631
1
  assert(self->weights.graph == graph);
3632
1
  outputs[0] = self->weights;
3633
1
}
3634
3635
static void _ccv_cnnp_parameter_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
3636
0
{
3637
0
  ccv_cnnp_model_parameter_t* const self = (ccv_cnnp_model_parameter_t*)super;
3638
0
  if (self->init_bound > 0)
3639
0
    initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-self->init_bound, self->init_bound), ccv_nnc_no_hint, 0, 0, self->weights);
3640
0
  else
3641
0
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->weights);
3642
0
}
3643
3644
static void _ccv_cnnp_parameter_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
3645
1
{
3646
1
  ccv_cnnp_model_parameter_t* const self = (ccv_cnnp_model_parameter_t*)super;
3647
1
  add_to_array(parameters, self->weights, is_trainable);
3648
1
}
3649
3650
static ccv_cnnp_model_t* _ccv_cnnp_parameter_copy(const ccv_cnnp_model_t* const super, void* const context);
3651
3652
static const ccv_cnnp_model_vtab_t ccv_cnnp_parameter_isa = {
3653
  .build = _ccv_cnnp_parameter_build,
3654
  .init_states = _ccv_cnnp_parameter_init_states,
3655
  .add_to_parameter = _ccv_cnnp_parameter_add_to_parameter,
3656
  .copy = _ccv_cnnp_parameter_copy,
3657
};
3658
3659
ccv_cnnp_model_t* ccv_cnnp_parameter(const ccv_nnc_tensor_param_t params, const float init_bound, const int is_trainable, const char* const name)
3660
1
{
3661
1
  ccv_cnnp_model_parameter_t* const model_parameter = (ccv_cnnp_model_parameter_t*)cccalloc(1, sizeof(ccv_cnnp_model_parameter_t));
3662
1
  model_parameter->super.isa = &ccv_cnnp_parameter_isa;
3663
1
  model_parameter->super.input_size = 0;
3664
1
  model_parameter->super.outputs = &model_parameter->output;
3665
1
  model_parameter->super.output_size = 1;
3666
1
  model_parameter->super.is_trainable = is_trainable;
3667
1
  ccv_cnnp_model_copy_name(&model_parameter->super, name);
3668
1
  model_parameter->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
3669
1
  model_parameter->weights.graph = 0;
3670
1
  model_parameter->weights_params = params;
3671
1
  return (ccv_cnnp_model_t*)model_parameter;
3672
1
}
3673
3674
static ccv_cnnp_model_t* _ccv_cnnp_parameter_copy(const ccv_cnnp_model_t* const super, void* const context)
3675
0
{
3676
0
  const ccv_cnnp_model_parameter_t* const self = (const ccv_cnnp_model_parameter_t*)super;
3677
0
  return ccv_cnnp_parameter(self->weights_params, self->init_bound, self->super.is_trainable, self->super.name);
3678
0
}
3679
3680
// MARK - Scalar Layer
3681
3682
typedef struct {
3683
  ccv_cnnp_model_t super;
3684
  int type;
3685
  int format;
3686
  int datatype;
3687
  float value;
3688
  ccv_nnc_tensor_symbol_t output;
3689
} ccv_cnnp_model_scalar_t;
3690
3691
static void _ccv_cnnp_scalar_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3692
2
{
3693
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_scalar_build] -\n");
3694
2
  assert(output_size == 1);
3695
2
  ccv_cnnp_model_scalar_t* const self = (ccv_cnnp_model_scalar_t*)super;
3696
2
  ccv_nnc_tensor_param_t params = {
3697
2
    .type = self->type,
3698
2
    .format = self->format,
3699
2
    .datatype = self->datatype,
3700
2
    .dim = {
3701
2
      1
3702
2
    }
3703
2
  };
3704
2
  if (input_size > 0)
3705
1
  {
3706
1
    ccv_nnc_tensor_param_t input_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3707
1
    params.type = input_params.type;
3708
1
    params.format = input_params.format;
3709
1
    params.datatype = input_params.datatype;
3710
1
  }
3711
2
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
3712
2
  ccv_nnc_graph_exec_symbol_new(graph, CMD_SET_FORWARD(self->value), 0, 0, outputs, 1, 0);
3713
2
}
3714
3715
static ccv_cnnp_model_t* _ccv_cnnp_scalar_copy(const ccv_cnnp_model_t* const super, void* const context);
3716
3717
static const ccv_cnnp_model_vtab_t ccv_cnnp_scalar_isa = {
3718
  .build = _ccv_cnnp_scalar_build,
3719
  .copy = _ccv_cnnp_scalar_copy,
3720
};
3721
3722
ccv_cnnp_model_t* ccv_cnnp_scalar(const int type, const int format, const int datatype, const float value, const char* const name)
3723
2
{
3724
2
  ccv_cnnp_model_scalar_t* const model_scalar = (ccv_cnnp_model_scalar_t*)cccalloc(1, sizeof(ccv_cnnp_model_scalar_t));
3725
2
  model_scalar->super.isa = &ccv_cnnp_scalar_isa;
3726
2
  model_scalar->super.input_size = 0;
3727
2
  model_scalar->super.outputs = &model_scalar->output;
3728
2
  model_scalar->super.output_size = 1;
3729
2
  ccv_cnnp_model_copy_name(&model_scalar->super, name);
3730
2
  model_scalar->type = type;
3731
2
  model_scalar->format = format;
3732
2
  model_scalar->datatype = datatype;
3733
2
  model_scalar->value = value;
3734
2
  return (ccv_cnnp_model_t*)model_scalar;
3735
2
}
3736
3737
static ccv_cnnp_model_t* _ccv_cnnp_scalar_copy(const ccv_cnnp_model_t* const super, void* const context)
3738
0
{
3739
0
  const ccv_cnnp_model_scalar_t* const self = (const ccv_cnnp_model_scalar_t*)super;
3740
0
  return ccv_cnnp_scalar(self->type, self->format, self->datatype, self->value, self->super.name);
3741
0
}
3742
3743
// MARK - Variable Layer
3744
3745
typedef struct {
3746
  ccv_cnnp_model_t super;
3747
  ccv_nnc_tensor_param_t params;
3748
  ccv_nnc_tensor_symbol_t output;
3749
} ccv_cnnp_model_variable_t;
3750
3751
static void _ccv_cnnp_variable_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3752
1
{
3753
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_variable_build] -\n");
3754
1
  assert(input_size == 0);
3755
1
  assert(output_size == 1);
3756
1
  ccv_cnnp_model_variable_t* const self = (ccv_cnnp_model_variable_t*)super;
3757
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, self->params, 0);
3758
1
}
3759
3760
static ccv_cnnp_model_t* _ccv_cnnp_variable_copy(const ccv_cnnp_model_t* const super, void* const context);
3761
3762
static const ccv_cnnp_model_vtab_t ccv_cnnp_variable_isa = {
3763
  .build = _ccv_cnnp_variable_build,
3764
  .copy = _ccv_cnnp_variable_copy,
3765
};
3766
3767
ccv_cnnp_model_t* ccv_cnnp_variable(const ccv_nnc_tensor_param_t params, const char* const name)
3768
1
{
3769
1
  ccv_cnnp_model_variable_t* const model_variable = (ccv_cnnp_model_variable_t*)cccalloc(1, sizeof(ccv_cnnp_model_variable_t));
3770
1
  model_variable->super.isa = &ccv_cnnp_variable_isa;
3771
1
  model_variable->super.input_size = 0;
3772
1
  model_variable->super.outputs = &model_variable->output;
3773
1
  model_variable->super.output_size = 1;
3774
1
  ccv_cnnp_model_copy_name(&model_variable->super, name);
3775
1
  model_variable->params = params;
3776
1
  return (ccv_cnnp_model_t*)model_variable;
3777
1
}
3778
3779
static ccv_cnnp_model_t* _ccv_cnnp_variable_copy(const ccv_cnnp_model_t* const super, void* const context)
3780
0
{
3781
0
  const ccv_cnnp_model_variable_t* const self = (const ccv_cnnp_model_variable_t*)super;
3782
0
  return ccv_cnnp_variable(self->params, self->super.name);
3783
0
}
3784
3785
// MARK - Move Layer
3786
3787
typedef struct {
3788
  ccv_cnnp_model_t super;
3789
  ccv_nnc_tensor_symbol_t output;
3790
} ccv_cnnp_model_move_t;
3791
3792
static void _ccv_cnnp_move_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3793
3
{
3794
3
  PRINT(CCV_CLI_VERBOSE, "[cnnp_move_build] -\n");
3795
3
  assert(input_size == 2);
3796
3
  assert(output_size == 1);
3797
3
  outputs[0] = inputs[1];
3798
3
  ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, 1, outputs, 1, "move");
3799
3
}
3800
3801
static ccv_cnnp_model_t* _ccv_cnnp_move_copy(const ccv_cnnp_model_t* const super, void* const context);
3802
3803
static const ccv_cnnp_model_vtab_t ccv_cnnp_move_isa = {
3804
  .build = _ccv_cnnp_move_build,
3805
  .copy = _ccv_cnnp_move_copy,
3806
};
3807
3808
ccv_cnnp_model_t* ccv_cnnp_move(const char* const name)
3809
3
{
3810
3
  ccv_cnnp_model_move_t* const model_move = (ccv_cnnp_model_move_t*)cccalloc(1, sizeof(ccv_cnnp_model_move_t));
3811
3
  model_move->super.isa = &ccv_cnnp_move_isa;
3812
3
  model_move->super.input_size = 2;
3813
3
  model_move->super.outputs = &model_move->output;
3814
3
  model_move->super.output_size = 1;
3815
3
  ccv_cnnp_model_copy_name(&model_move->super, name);
3816
3
  return (ccv_cnnp_model_t*)model_move;
3817
3
}
3818
3819
static ccv_cnnp_model_t* _ccv_cnnp_move_copy(const ccv_cnnp_model_t* const super, void* const context)
3820
0
{
3821
0
  const ccv_cnnp_model_move_t* const self = (const ccv_cnnp_model_move_t*)super;
3822
0
  return ccv_cnnp_move(self->super.name);
3823
0
}
3824
3825
// MARK - "Making" Contiguous Layer
3826
3827
typedef struct {
3828
  ccv_cnnp_model_t super;
3829
  ccv_nnc_tensor_symbol_t output;
3830
} ccv_cnnp_model_contiguous_t;
3831
3832
static void _ccv_cnnp_contiguous_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3833
5
{
3834
5
  PRINT(CCV_CLI_VERBOSE, "[cnnp_contiguous_build] -\n");
3835
5
  assert(input_size == 1);
3836
5
  assert(output_size == 1);
3837
5
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3838
5
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
3839
5
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
3840
0
  {
3841
0
    outputs[0] = inputs[0];
3842
0
    return;
3843
0
  }
3844
  // Otherwise, we need to check its stride to know if it is contiguous.
3845
5
  int old_stride[CCV_NNC_MAX_DIM_ALLOC];
3846
5
  ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], 0, old_stride);
3847
  // We identify permute by checking if the stride is not in descending order.
3848
  // This also covered "permute" through reshape, rather than using ccv_cnnp_permute directly.
3849
5
  if (ccv_nnc_is_tensor_stride_packed(old_stride, params.dim))
3850
2
  {
3851
2
    outputs[0] = inputs[0];
3852
2
    return;
3853
2
  }
3854
3
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
3855
3
  ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, 1, outputs, 1, "contiguous");
3856
3
  ccv_nnc_graph_exec_symbol_set_flags(graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
3857
3
}
3858
3859
static ccv_cnnp_model_t* _ccv_cnnp_contiguous_copy(const ccv_cnnp_model_t* const super, void* const context);
3860
3861
static const ccv_cnnp_model_vtab_t ccv_cnnp_contiguous_isa = {
3862
  .build = _ccv_cnnp_contiguous_build,
3863
  .copy = _ccv_cnnp_contiguous_copy,
3864
};
3865
3866
ccv_cnnp_model_t* ccv_cnnp_contiguous(const char* const name)
3867
5
{
3868
5
  ccv_cnnp_model_contiguous_t* const model_contiguous = (ccv_cnnp_model_contiguous_t*)cccalloc(1, sizeof(ccv_cnnp_model_contiguous_t));
3869
5
  model_contiguous->super.isa = &ccv_cnnp_contiguous_isa;
3870
5
  model_contiguous->super.input_size = 1;
3871
5
  model_contiguous->super.outputs = &model_contiguous->output;
3872
5
  model_contiguous->super.output_size = 1;
3873
5
  ccv_cnnp_model_copy_name(&model_contiguous->super, name);
3874
5
  return (ccv_cnnp_model_t*)model_contiguous;
3875
5
}
3876
3877
static ccv_cnnp_model_t* _ccv_cnnp_contiguous_copy(const ccv_cnnp_model_t* const super, void* const context)
3878
0
{
3879
0
  const ccv_cnnp_model_contiguous_t* const self = (const ccv_cnnp_model_contiguous_t*)super;
3880
0
  return ccv_cnnp_contiguous(self->super.name);
3881
0
}
3882
3883
// MARK - "Making" Copy Layer
3884
3885
typedef struct {
3886
  ccv_cnnp_model_t super;
3887
  ccv_nnc_tensor_symbol_t output;
3888
} ccv_cnnp_model_copy_t;
3889
3890
static void _ccv_cnnp_copy_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3891
0
{
3892
0
  PRINT(CCV_CLI_VERBOSE, "[cnnp_copy_build] -\n");
3893
0
  assert(input_size == 1);
3894
0
  assert(output_size == 1);
3895
0
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3896
0
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
3897
0
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
3898
0
  {
3899
0
    outputs[0] = inputs[0];
3900
0
    return;
3901
0
  }
3902
0
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
3903
0
  ccv_nnc_graph_exec_symbol_t make_contiguous = ccv_nnc_graph_exec_symbol_new(graph, CMD_FORMAT_TRANSFORM_FORWARD(), inputs, 1, outputs, 1, "contiguous");
3904
0
  ccv_nnc_graph_exec_symbol_set_flags(graph, make_contiguous, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
3905
0
}
3906
3907
static ccv_cnnp_model_t* _ccv_cnnp_copy_copy(const ccv_cnnp_model_t* const super, void* const context);
3908
3909
static const ccv_cnnp_model_vtab_t ccv_cnnp_copy_isa = {
3910
  .build = _ccv_cnnp_copy_build,
3911
  .copy = _ccv_cnnp_copy_copy,
3912
};
3913
3914
ccv_cnnp_model_t* ccv_cnnp_copy(const char* const name)
3915
0
{
3916
0
  ccv_cnnp_model_copy_t* const model_copy = (ccv_cnnp_model_copy_t*)cccalloc(1, sizeof(ccv_cnnp_model_copy_t));
3917
0
  model_copy->super.isa = &ccv_cnnp_copy_isa;
3918
0
  model_copy->super.input_size = 1;
3919
0
  model_copy->super.outputs = &model_copy->output;
3920
0
  model_copy->super.output_size = 1;
3921
0
  ccv_cnnp_model_copy_name(&model_copy->super, name);
3922
0
  return (ccv_cnnp_model_t*)model_copy;
3923
0
}
3924
3925
static ccv_cnnp_model_t* _ccv_cnnp_copy_copy(const ccv_cnnp_model_t* const super, void* const context)
3926
0
{
3927
0
  const ccv_cnnp_model_copy_t* const self = (const ccv_cnnp_model_copy_t*)super;
3928
0
  return ccv_cnnp_copy(self->super.name);
3929
0
}
3930
3931
// MARK - Scaled-Dot Product Attention Layer
3932
3933
typedef struct {
3934
  ccv_cnnp_model_t super;
3935
  ccv_nnc_tensor_symbol_t output;
3936
  ccv_nnc_tensor_symbol_t weights;
3937
  ccv_nnc_tensor_symbol_t bias;
3938
  float scale;
3939
  int is_causal;
3940
  int has_attn_mask;
3941
  int flags;
3942
  int fused_unify_head_weights;
3943
  int no_bias;
3944
} ccv_cnnp_model_scaled_dot_product_attention_t;
3945
3946
static void _ccv_cnnp_scaled_dot_product_attention_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
3947
3
{
3948
3
  PRINT(CCV_CLI_VERBOSE, "[cnnp_scaled_dot_product_attention_build] -\n");
3949
3
  assert(input_size == 3 || input_size == 4);
3950
3
  assert(output_size == 1);
3951
3
  ccv_cnnp_model_scaled_dot_product_attention_t* const self = (ccv_cnnp_model_scaled_dot_product_attention_t*)super;
3952
3
  const ccv_nnc_tensor_param_t q_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
3953
3
  const ccv_nnc_tensor_param_t k_params = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
3954
3
  const ccv_nnc_tensor_param_t v_params = ccv_nnc_tensor_symbol_params(graph, inputs[2]);
3955
3
  const int v_nd = ccv_nnc_tensor_nd(v_params.dim);
3956
3
  assert(v_nd == 3 || v_nd == 4);
3957
3
  const int hEv = (v_nd == 3 ? 
10
: v_params.dim[2]) * v_params.dim[v_nd - 1];
3958
3
  ccv_nnc_tensor_param_t weights_params = q_params;
3959
3
  memset(weights_params.dim, 0, sizeof(weights_params.dim));
3960
3
  weights_params.dim[0] = hEv;
3961
3
  weights_params.dim[1] = hEv;
3962
3
  ccv_nnc_tensor_param_t bias_params = q_params;
3963
3
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
3964
3
  bias_params.dim[0] = hEv;
3965
3
  ccv_nnc_cmd_t cmd = {0};
3966
3
  cmd.cmd = CCV_NNC_SCALED_DOT_PRODUCT_ATTENTION_FORWARD;
3967
3
  cmd.info.scaled_dot_product_attention.scale = self->scale;
3968
3
  cmd.info.scaled_dot_product_attention.is_causal = self->is_causal;
3969
3
  cmd.info.scaled_dot_product_attention.flags = self->flags;
3970
3
  ccv_nnc_tensor_param_t output_params[3];
3971
3
  ccv_nnc_tensor_symbol_t output;
3972
3
  ccv_nnc_tensor_symbol_t saved_softmax_lse;
3973
3
  ccv_nnc_tensor_symbol_t saved_v_proj = NO_TENSOR_SYMBOL;
3974
3
  ccv_nnc_tensor_symbol_t attn_mask = NO_TENSOR_SYMBOL;
3975
3
  ccv_nnc_tensor_symbol_t weights = NO_TENSOR_SYMBOL;
3976
3
  ccv_nnc_tensor_symbol_t bias = NO_TENSOR_SYMBOL;
3977
3
  if (self->has_attn_mask)
3978
1
    attn_mask = inputs[3];
3979
3
  if (self->fused_unify_head_weights)
3980
1
  {
3981
1
    if (!self->weights.graph)
3982
1
      self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
3983
1
    weights = self->weights;
3984
1
    if (!self->no_bias)
3985
1
    {
3986
1
      if (!self->bias.graph)
3987
1
        self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
3988
1
      bias = self->bias;
3989
1
    }
3990
1
    ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
3991
1
        q_params,
3992
1
        k_params,
3993
1
        v_params,
3994
1
        (ccv_nnc_tensor_param_t){},
3995
1
        weights_params,
3996
1
        bias_params,
3997
1
      }, 6, ccv_nnc_no_hint, output_params, 3);
3998
1
    output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
3999
1
    saved_softmax_lse = ccv_nnc_tensor_symbol_new(graph, output_params[1], 0);
4000
1
    saved_v_proj = ccv_nnc_tensor_symbol_new(graph, output_params[2], 0);
4001
2
  } else {
4002
2
    ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
4003
2
        q_params,
4004
2
        k_params,
4005
2
        v_params,
4006
2
      }, 3, ccv_nnc_no_hint, output_params, 2);
4007
2
    output = ccv_nnc_tensor_symbol_new(graph, output_params[0], 0);
4008
2
    saved_softmax_lse = ccv_nnc_tensor_symbol_new(graph, output_params[1], 0);
4009
2
  }
4010
3
  ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], inputs[1], inputs[2], attn_mask, weights, bias), TENSOR_SYMBOL_LIST(output, saved_softmax_lse, saved_v_proj), "scaled_dot_product_attention");
4011
3
  outputs[0] = output;
4012
3
}
4013
4014
static void _ccv_cnnp_scaled_dot_product_attention_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
4015
0
{
4016
0
  ccv_cnnp_model_scaled_dot_product_attention_t* const self = (ccv_cnnp_model_scaled_dot_product_attention_t*)super;
4017
0
  if (self->weights.graph)
4018
0
  {
4019
0
    assert(self->fused_unify_head_weights);
4020
0
    const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
4021
0
    const int c = weight_params.dim[1];
4022
0
    const float std = sqrtf(2) / sqrtf(c);
4023
0
    const float bound = sqrtf(3) * std;
4024
0
    initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
4025
0
    if (self->bias.graph)
4026
0
      initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
4027
0
  }
4028
0
}
4029
4030
static void _ccv_cnnp_scaled_dot_product_attention_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
4031
1
{
4032
1
  ccv_cnnp_model_scaled_dot_product_attention_t* const self = (ccv_cnnp_model_scaled_dot_product_attention_t*)super;
4033
1
  if (self->weights.graph)
4034
1
  {
4035
1
    assert(self->fused_unify_head_weights);
4036
1
    add_to_array(parameters, self->weights, is_trainable);
4037
1
    if (self->bias.graph)
4038
1
      add_to_array(parameters, self->bias, is_trainable);
4039
1
  }
4040
1
}
4041
4042
static ccv_cnnp_model_t* _ccv_cnnp_scaled_dot_product_attention_copy(const ccv_cnnp_model_t* const super, void* const context);
4043
4044
static const ccv_cnnp_model_vtab_t ccv_cnnp_scaled_dot_product_attention_isa = {
4045
  .build = _ccv_cnnp_scaled_dot_product_attention_build,
4046
  .copy = _ccv_cnnp_scaled_dot_product_attention_copy,
4047
};
4048
4049
static const ccv_cnnp_model_vtab_t ccv_cnnp_scaled_dot_product_attention_fused_isa = {
4050
  .build = _ccv_cnnp_scaled_dot_product_attention_build,
4051
  .init_states = _ccv_cnnp_scaled_dot_product_attention_init_states,
4052
  .add_to_parameter = _ccv_cnnp_scaled_dot_product_attention_add_to_parameter,
4053
  .copy = _ccv_cnnp_scaled_dot_product_attention_copy,
4054
};
4055
4056
ccv_cnnp_model_t* ccv_cnnp_scaled_dot_product_attention(const float scale, const int is_causal, const int has_attn_mask, const int flags, const int fused_unify_head_weights, const int no_bias, const int is_trainable, const char* const name)
4057
3
{
4058
3
  ccv_cnnp_model_scaled_dot_product_attention_t* const model_scaled_dot_product_attention = (ccv_cnnp_model_scaled_dot_product_attention_t*)cccalloc(1, sizeof(ccv_cnnp_model_scaled_dot_product_attention_t));
4059
3
  model_scaled_dot_product_attention->super.isa = fused_unify_head_weights ? 
&ccv_cnnp_scaled_dot_product_attention_fused_isa1
:
&ccv_cnnp_scaled_dot_product_attention_isa2
;
4060
3
  model_scaled_dot_product_attention->super.input_size = has_attn_mask ? 
41
:
32
;
4061
3
  model_scaled_dot_product_attention->super.outputs = &model_scaled_dot_product_attention->output;
4062
3
  model_scaled_dot_product_attention->super.output_size = 1;
4063
3
  model_scaled_dot_product_attention->super.is_trainable = is_trainable;
4064
3
  ccv_cnnp_model_copy_name(&model_scaled_dot_product_attention->super, name);
4065
3
  model_scaled_dot_product_attention->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
4066
3
  model_scaled_dot_product_attention->weights.graph = 0;
4067
3
  model_scaled_dot_product_attention->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
4068
3
  model_scaled_dot_product_attention->bias.graph = 0;
4069
3
  model_scaled_dot_product_attention->scale = scale;
4070
3
  model_scaled_dot_product_attention->is_causal = is_causal;
4071
3
  model_scaled_dot_product_attention->has_attn_mask = has_attn_mask;
4072
3
  model_scaled_dot_product_attention->flags = flags;
4073
3
  model_scaled_dot_product_attention->fused_unify_head_weights = fused_unify_head_weights;
4074
3
  model_scaled_dot_product_attention->no_bias = no_bias;
4075
3
  return (ccv_cnnp_model_t*)model_scaled_dot_product_attention;
4076
3
}
4077
4078
static ccv_cnnp_model_t* _ccv_cnnp_scaled_dot_product_attention_copy(const ccv_cnnp_model_t* const super, void* const context)
4079
0
{
4080
0
  const ccv_cnnp_model_scaled_dot_product_attention_t* const self = (const ccv_cnnp_model_scaled_dot_product_attention_t*)super;
4081
0
  return ccv_cnnp_scaled_dot_product_attention(self->scale, self->is_causal, self->has_attn_mask, self->flags, self->fused_unify_head_weights, self->no_bias, self->super.is_trainable, self->super.name);
4082
0
}
4083
4084
// MARK - Debug Layer
4085
4086
typedef struct {
4087
  ccv_cnnp_model_t super;
4088
  ccv_nnc_tensor_symbol_t output;
4089
  ccv_cnnp_model_debug_f debugger;
4090
  ccv_cnnp_model_debug_context_deinit_f debug_deinit;
4091
  ccv_cnnp_model_debug_context_copy_f debug_copy;
4092
  void* debug_context;
4093
} ccv_cnnp_model_debug_t;
4094
4095
static int _ccv_cnnp_debug_exec(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
4096
1
{
4097
1
  if (cmd.cmd == CCV_NNC_CUSTOM_BACKWARD)
4098
0
  {
4099
0
    assert(0 && "don't support debug backward pass yet");
4100
0
  }
4101
1
  ccv_cnnp_model_debug_t* const self = (ccv_cnnp_model_debug_t*)cmd.data;
4102
1
  self->debugger(inputs, input_size, stream_context, self->debug_context);
4103
1
  return CCV_NNC_EXEC_SUCCESS;
4104
1
}
4105
4106
static ccv_nnc_cmd_vtab_t ccv_cnnp_debug_exec_isa = {
4107
  .exec = _ccv_cnnp_debug_exec
4108
};
4109
4110
static void _ccv_cnnp_debug_build(ccv_cnnp_model_t* const self, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4111
1
{
4112
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_debug_build] -\n");
4113
1
  assert(input_size >= 1);
4114
1
  assert(output_size == 1);
4115
1
  ccv_nnc_tensor_symbol_t to = ccv_nnc_tensor_symbol_alias_to(graph, inputs[0]);
4116
1
  ccv_nnc_tensor_param_t output_params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4117
1
  if (to.d == CCV_NNC_NO_TENSOR_SYMBOL) // If we are not reshape an alias, it is straightforward.
4118
1
  {
4119
1
    int ofs[CCV_NNC_MAX_DIM_ALLOC] = {0};
4120
1
    int stride[CCV_NNC_MAX_DIM_ALLOC];
4121
1
    ccv_nnc_tensor_get_stride(output_params.dim, stride);
4122
1
    outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, inputs[0], ofs, stride, output_params, 0);
4123
1
  } else {
4124
0
    int old_ofs[CCV_NNC_MAX_DIM_ALLOC];
4125
0
    int old_stride[CCV_NNC_MAX_DIM_ALLOC];
4126
0
    ccv_nnc_tensor_symbol_alias_params(graph, inputs[0], old_ofs, old_stride);
4127
0
    outputs[0] = ccv_nnc_tensor_symbol_alias_new(graph, to, old_ofs, old_stride, output_params, 0);
4128
0
  }
4129
1
  ccv_nnc_cmd_t cmd = ccv_nnc_cmd(CCV_NNC_CUSTOM_FORWARD, (ccv_nnc_cmd_vtab_t*)&ccv_cnnp_debug_exec_isa, (ccv_nnc_cmd_param_t){}, 0);
4130
1
  cmd.data = self;
4131
1
  ccv_nnc_graph_exec_symbol_t make_debug = ccv_nnc_graph_exec_symbol_new(graph, cmd, inputs, input_size, outputs, 1, "debug");
4132
  // Disable any optimizations.
4133
1
  ccv_nnc_graph_exec_symbol_set_flags(graph, make_debug, CCV_NNC_GRAPH_EXEC_DISABLE_OPT);
4134
1
}
4135
4136
static void _ccv_cnnp_debug_deinit(ccv_cnnp_model_t* const super)
4137
1
{
4138
1
  const ccv_cnnp_model_debug_t* const self = (const ccv_cnnp_model_debug_t*)super;
4139
1
  if (self->debug_deinit && 
self->debug_context0
)
4140
0
    self->debug_deinit(self->debug_context);
4141
1
}
4142
4143
static ccv_cnnp_model_t* _ccv_cnnp_debug_copy(const ccv_cnnp_model_t* const super, void* const context);
4144
4145
static const ccv_cnnp_model_vtab_t ccv_cnnp_debug_isa = {
4146
  .build = _ccv_cnnp_debug_build,
4147
  .deinit = _ccv_cnnp_debug_deinit,
4148
  .copy = _ccv_cnnp_debug_copy,
4149
};
4150
4151
ccv_cnnp_model_t* ccv_cnnp_debug(ccv_cnnp_model_debug_f func, void* const context, ccv_cnnp_model_debug_context_deinit_f deinit, ccv_cnnp_model_debug_context_copy_f copy, const char* const name)
4152
1
{
4153
1
  ccv_cnnp_model_debug_t* const model_debug = (ccv_cnnp_model_debug_t*)cccalloc(1, sizeof(ccv_cnnp_model_debug_t));
4154
1
  model_debug->super.isa = &ccv_cnnp_debug_isa;
4155
1
  model_debug->super.input_size = 0;
4156
1
  model_debug->super.outputs = &model_debug->output;
4157
1
  model_debug->super.output_size = 1;
4158
1
  model_debug->debugger = func;
4159
1
  model_debug->debug_context = context;
4160
1
  model_debug->debug_deinit = deinit;
4161
1
  model_debug->debug_copy = copy;
4162
1
  ccv_cnnp_model_copy_name(&model_debug->super, name);
4163
1
  return (ccv_cnnp_model_t*)model_debug;
4164
1
}
4165
4166
static ccv_cnnp_model_t* _ccv_cnnp_debug_copy(const ccv_cnnp_model_t* const super, void* const context)
4167
0
{
4168
0
  const ccv_cnnp_model_debug_t* const self = (const ccv_cnnp_model_debug_t*)super;
4169
0
  void* debug_context = self->debug_context;
4170
0
  if (self->debug_copy && self->debug_context)
4171
0
    debug_context = self->debug_copy(self->debug_context);
4172
0
  return ccv_cnnp_debug(self->debugger, debug_context, self->debug_deinit, self->debug_copy, self->super.name);
4173
0
}
4174
4175
/// MARK - Sort layer.
4176
4177
typedef struct {
4178
  ccv_cnnp_model_t super;
4179
  ccv_nnc_tensor_symbol_t outputs[2];
4180
  int along_axis;
4181
  int descending;
4182
} ccv_cnnp_model_sort_t;
4183
4184
static void _ccv_cnnp_sort_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4185
1
{
4186
1
  ccv_cnnp_model_sort_t* const self = (ccv_cnnp_model_sort_t*)super;
4187
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_sort_build] - along_axis: %d, descending: %d\n", self->along_axis, self->descending);
4188
1
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4189
1
  assert(output_size == 2);
4190
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4191
1
  params.datatype = CCV_32S;
4192
1
  outputs[1] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4193
1
  ccv_nnc_graph_exec_symbol_new(graph, CMD_SORT_FORWARD(self->along_axis, self->descending), inputs, input_size, outputs, output_size, "sort");
4194
1
}
4195
4196
static ccv_cnnp_model_t* _ccv_cnnp_sort_copy(const ccv_cnnp_model_t* const self, void* const context);
4197
4198
static const ccv_cnnp_model_vtab_t ccv_cnnp_sort_isa = {
4199
  .build = _ccv_cnnp_sort_build,
4200
  .copy = _ccv_cnnp_sort_copy,
4201
};
4202
4203
ccv_cnnp_model_t* ccv_cnnp_sort(const int along_axis, const int descending, const char* const name)
4204
1
{
4205
1
  ccv_cnnp_model_sort_t* const model_sort = (ccv_cnnp_model_sort_t*)cccalloc(1, sizeof(ccv_cnnp_model_sort_t));
4206
1
  model_sort->super.isa = &ccv_cnnp_sort_isa;
4207
1
  model_sort->super.input_size = 0;
4208
1
  model_sort->super.outputs = model_sort->outputs;
4209
1
  model_sort->super.output_size = 2;
4210
1
  model_sort->along_axis = along_axis;
4211
1
  model_sort->descending = descending;
4212
1
  ccv_cnnp_model_copy_name(&model_sort->super, name);
4213
1
  return (ccv_cnnp_model_t*)model_sort;
4214
1
}
4215
4216
static ccv_cnnp_model_t* _ccv_cnnp_sort_copy(const ccv_cnnp_model_t* const super, void* const context)
4217
0
{
4218
0
  ccv_cnnp_model_sort_t* const self = (ccv_cnnp_model_sort_t*)super;
4219
0
  return ccv_cnnp_sort(self->along_axis, self->descending, self->super.name);
4220
0
}
4221
4222
/// MARK - Partition layer.
4223
4224
typedef struct {
4225
  ccv_cnnp_model_t super;
4226
  ccv_nnc_tensor_symbol_t outputs[2];
4227
  int kth;
4228
  int along_axis;
4229
  int descending;
4230
} ccv_cnnp_model_partition_t;
4231
4232
static void _ccv_cnnp_partition_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4233
1
{
4234
1
  ccv_cnnp_model_partition_t* const self = (ccv_cnnp_model_partition_t*)super;
4235
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_partition_build] - kth: %d, along_axis: %d, descending: %d\n", self->kth, self->along_axis, self->descending);
4236
1
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4237
1
  assert(output_size == 2);
4238
1
  if (self->kth > 0)
4239
1
    params.dim[self->along_axis] = self->kth;
4240
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4241
1
  params.datatype = CCV_32S;
4242
1
  outputs[1] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4243
1
  ccv_nnc_graph_exec_symbol_new(graph, CMD_PARTITION_FORWARD(self->kth, self->along_axis, self->descending), inputs, input_size, outputs, output_size, "partition");
4244
1
}
4245
4246
static ccv_cnnp_model_t* _ccv_cnnp_partition_copy(const ccv_cnnp_model_t* const self, void* const context);
4247
4248
static const ccv_cnnp_model_vtab_t ccv_cnnp_partition_isa = {
4249
  .build = _ccv_cnnp_partition_build,
4250
  .copy = _ccv_cnnp_partition_copy,
4251
};
4252
4253
ccv_cnnp_model_t* ccv_cnnp_partition(const int kth, const int along_axis, const int descending, const char* const name)
4254
1
{
4255
1
  ccv_cnnp_model_partition_t* const model_partition = (ccv_cnnp_model_partition_t*)cccalloc(1, sizeof(ccv_cnnp_model_partition_t));
4256
1
  model_partition->super.isa = &ccv_cnnp_partition_isa;
4257
1
  model_partition->super.input_size = 0;
4258
1
  model_partition->super.outputs = model_partition->outputs;
4259
1
  model_partition->super.output_size = 2;
4260
1
  model_partition->kth = kth;
4261
1
  model_partition->along_axis = along_axis;
4262
1
  model_partition->descending = descending;
4263
1
  ccv_cnnp_model_copy_name(&model_partition->super, name);
4264
1
  return (ccv_cnnp_model_t*)model_partition;
4265
1
}
4266
4267
static ccv_cnnp_model_t* _ccv_cnnp_partition_copy(const ccv_cnnp_model_t* const super, void* const context)
4268
0
{
4269
0
  ccv_cnnp_model_partition_t* const self = (ccv_cnnp_model_partition_t*)super;
4270
0
  return ccv_cnnp_partition(self->kth, self->along_axis, self->descending, self->super.name);
4271
0
}
4272
4273
/// MARK - Unique consecutive layer.
4274
4275
typedef struct {
4276
  ccv_cnnp_model_t super;
4277
  ccv_nnc_tensor_symbol_t outputs[2];
4278
  int bincount;
4279
} ccv_cnnp_model_unique_consecutive_t;
4280
4281
static void _ccv_cnnp_unique_consecutive_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4282
1
{
4283
1
  ccv_cnnp_model_unique_consecutive_t* const self = (ccv_cnnp_model_unique_consecutive_t*)super;
4284
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_unique_consecutive_build] - bincount: %d\n", self->bincount);
4285
1
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4286
1
  assert(output_size == 2);
4287
1
  if (self->bincount > 0)
4288
1
    params.dim[0] = ccv_min(params.dim[0], self->bincount);
4289
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4290
1
  params.datatype = CCV_32S;
4291
1
  outputs[1] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4292
1
  ccv_nnc_graph_exec_symbol_new(graph, CMD_UNIQUE_CONSECUTIVE_FORWARD(self->bincount), inputs, input_size, outputs, output_size, "unique_consecutive");
4293
1
}
4294
4295
static ccv_cnnp_model_t* _ccv_cnnp_unique_consecutive_copy(const ccv_cnnp_model_t* const self, void* const context);
4296
4297
static const ccv_cnnp_model_vtab_t ccv_cnnp_unique_consecutive_isa = {
4298
  .build = _ccv_cnnp_unique_consecutive_build,
4299
  .copy = _ccv_cnnp_unique_consecutive_copy,
4300
};
4301
4302
ccv_cnnp_model_t* ccv_cnnp_unique_consecutive(const int bincount, const char* const name)
4303
1
{
4304
1
  ccv_cnnp_model_unique_consecutive_t* const model_unique_consecutive = (ccv_cnnp_model_unique_consecutive_t*)cccalloc(1, sizeof(ccv_cnnp_model_unique_consecutive_t));
4305
1
  model_unique_consecutive->super.isa = &ccv_cnnp_unique_consecutive_isa;
4306
1
  model_unique_consecutive->super.input_size = 0;
4307
1
  model_unique_consecutive->super.outputs = model_unique_consecutive->outputs;
4308
1
  model_unique_consecutive->super.output_size = 2;
4309
1
  model_unique_consecutive->bincount = bincount;
4310
1
  ccv_cnnp_model_copy_name(&model_unique_consecutive->super, name);
4311
1
  return (ccv_cnnp_model_t*)model_unique_consecutive;
4312
1
}
4313
4314
static ccv_cnnp_model_t* _ccv_cnnp_unique_consecutive_copy(const ccv_cnnp_model_t* const super, void* const context)
4315
0
{
4316
0
  ccv_cnnp_model_unique_consecutive_t* const self = (ccv_cnnp_model_unique_consecutive_t*)super;
4317
0
  return ccv_cnnp_unique_consecutive(self->bincount, self->super.name);
4318
0
}
4319
4320
/// MARK - Scatter add layer.
4321
4322
typedef struct {
4323
  ccv_cnnp_model_t super;
4324
  ccv_nnc_tensor_symbol_t output;
4325
  int bincount;
4326
} ccv_cnnp_model_scatter_add_t;
4327
4328
static void _ccv_cnnp_scatter_add_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4329
1
{
4330
1
  ccv_cnnp_model_scatter_add_t* const self = (ccv_cnnp_model_scatter_add_t*)super;
4331
1
  PRINT(CCV_CLI_VERBOSE, "[cnnp_scatter_add_build] - bincount: %d\n", self->bincount);
4332
1
  ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4333
1
  assert(output_size == 1);
4334
1
  assert(self->bincount > 0);
4335
1
  params.dim[0] = self->bincount;
4336
1
  outputs[0] = ccv_nnc_tensor_symbol_new(graph, params, 0);
4337
1
  ccv_nnc_graph_exec_symbol_new(graph, CMD_SCATTER_ADD_FORWARD(self->bincount), inputs, input_size, outputs, output_size, "scatter_add");
4338
1
}
4339
4340
static ccv_cnnp_model_t* _ccv_cnnp_scatter_add_copy(const ccv_cnnp_model_t* const self, void* const context);
4341
4342
static const ccv_cnnp_model_vtab_t ccv_cnnp_scatter_add_isa = {
4343
  .build = _ccv_cnnp_scatter_add_build,
4344
  .copy = _ccv_cnnp_scatter_add_copy,
4345
};
4346
4347
ccv_cnnp_model_t* ccv_cnnp_scatter_add(const int bincount, const char* const name)
4348
1
{
4349
1
  assert(bincount > 0);
4350
1
  ccv_cnnp_model_scatter_add_t* const model_scatter_add = (ccv_cnnp_model_scatter_add_t*)cccalloc(1, sizeof(ccv_cnnp_model_scatter_add_t));
4351
1
  model_scatter_add->super.isa = &ccv_cnnp_scatter_add_isa;
4352
1
  model_scatter_add->super.input_size = 0;
4353
1
  model_scatter_add->super.outputs = &model_scatter_add->output;
4354
1
  model_scatter_add->super.output_size = 1;
4355
1
  model_scatter_add->bincount = bincount;
4356
1
  ccv_cnnp_model_copy_name(&model_scatter_add->super, name);
4357
1
  return (ccv_cnnp_model_t*)model_scatter_add;
4358
1
}
4359
4360
static ccv_cnnp_model_t* _ccv_cnnp_scatter_add_copy(const ccv_cnnp_model_t* const super, void* const context)
4361
0
{
4362
0
  ccv_cnnp_model_scatter_add_t* const self = (ccv_cnnp_model_scatter_add_t*)super;
4363
0
  return ccv_cnnp_scatter_add(self->bincount, self->super.name);
4364
0
}
4365
4366
// MARK - Segmented Dense Layer
4367
4368
typedef struct {
4369
  ccv_cnnp_model_t super;
4370
  ccv_nnc_tensor_symbol_t output;
4371
  ccv_nnc_tensor_symbol_t weights;
4372
  ccv_nnc_tensor_symbol_t bias;
4373
  int segments;
4374
  int count;
4375
  int no_bias;
4376
  int flags;
4377
} ccv_cnnp_model_segmented_dense_t;
4378
4379
static void _ccv_cnnp_segmented_dense_build(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_nnc_tensor_symbol_t* const inputs, const int input_size, ccv_nnc_tensor_symbol_t* const outputs, const int output_size)
4380
2
{
4381
2
  ccv_cnnp_model_segmented_dense_t* const self = (ccv_cnnp_model_segmented_dense_t*)super;
4382
2
  PRINT(CCV_CLI_VERBOSE, "[cnnp_segmented_dense_build] -\n");
4383
2
  assert(input_size == 3);
4384
2
  assert(output_size == 1);
4385
2
  const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(graph, inputs[0]);
4386
2
  const ccv_nnc_tensor_param_t indices_params = ccv_nnc_tensor_symbol_params(graph, inputs[1]);
4387
2
  const ccv_nnc_tensor_param_t counts_params = ccv_nnc_tensor_symbol_params(graph, inputs[2]);
4388
2
  ccv_nnc_tensor_param_t weights_params = params;
4389
2
  memset(weights_params.dim, 0, sizeof(weights_params.dim));
4390
2
  weights_params.dim[0] = self->segments;
4391
2
  weights_params.dim[1] = self->count;
4392
2
  weights_params.dim[2] = params.dim[ccv_nnc_tensor_nd(params.dim) - 1];
4393
2
  if (!self->weights.graph)
4394
2
    self->weights = ccv_nnc_tensor_symbol_new(graph, weights_params, "weights");
4395
2
  assert(self->weights.graph == graph);
4396
2
  ccv_nnc_tensor_param_t bias_params = params;
4397
2
  memset(bias_params.dim, 0, sizeof(bias_params.dim));
4398
2
  bias_params.dim[0] = self->segments;
4399
2
  bias_params.dim[1] = self->count;
4400
2
  ccv_nnc_cmd_t cmd = {0};
4401
2
  cmd.cmd = CCV_NNC_SEGMENTED_GEMM_FORWARD;
4402
2
  cmd.info.blas.a[0] = 1;
4403
2
  cmd.info.blas.a[1] = 1;
4404
2
  cmd.info.blas.transpose_b[0] = 1;
4405
2
  cmd.info.blas.transpose_b[1] = 2;
4406
2
  cmd.info.blas.flags = self->flags;
4407
2
  ccv_nnc_tensor_param_t output_params;
4408
2
  ccv_nnc_hint_tensor_auto(cmd, (ccv_nnc_tensor_param_t []){
4409
2
      params, indices_params, counts_params,
4410
2
      weights_params,
4411
2
      bias_params,
4412
2
    }, 5, ccv_nnc_no_hint, &output_params, 1);
4413
2
  const ccv_nnc_tensor_symbol_t output = ccv_nnc_tensor_symbol_new(graph, output_params, 0);
4414
2
  if (self->no_bias)
4415
1
    ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], inputs[1], inputs[2], self->weights), TENSOR_SYMBOL_LIST(output), "segmented_dense");
4416
1
  else {
4417
1
    if (!self->bias.graph)
4418
1
      self->bias = ccv_nnc_tensor_symbol_new(graph, bias_params, "bias");
4419
1
    ccv_nnc_graph_exec_symbol_new(graph, cmd, TENSOR_SYMBOL_LIST(inputs[0], inputs[1], inputs[2], self->weights, self->bias), TENSOR_SYMBOL_LIST(output), "segmented_dense");
4420
1
  }
4421
2
  outputs[0] = output;
4422
2
}
4423
4424
static void _ccv_cnnp_segmented_dense_init_states(ccv_cnnp_model_t* const super, ccv_nnc_symbolic_graph_t* const graph, const ccv_cnnp_state_initializer_f initializer, void* const context)
4425
0
{
4426
0
  ccv_cnnp_model_segmented_dense_t* const self = (ccv_cnnp_model_segmented_dense_t*)super;
4427
0
  const ccv_nnc_tensor_param_t weight_params = ccv_nnc_tensor_symbol_params(graph, self->weights);
4428
0
  const int c = weight_params.dim[1];
4429
0
  const float std = sqrtf(2) / sqrtf(c);
4430
0
  const float bound = sqrtf(3) * std;
4431
0
  initializer(context, CMD_RANDOM_UNIFORM_FORWARD(-bound, bound), ccv_nnc_no_hint, 0, 0, self->weights);
4432
0
  if (self->bias.graph)
4433
0
    initializer(context, CMD_SET_FORWARD(0), ccv_nnc_no_hint, 0, 0, self->bias);
4434
0
}
4435
4436
static void _ccv_cnnp_segmented_dense_add_to_parameter(ccv_cnnp_model_t* const super, const ccv_cnnp_add_to_array_f add_to_array, void* const parameters, const int is_trainable)
4437
2
{
4438
2
  ccv_cnnp_model_segmented_dense_t* const self = (ccv_cnnp_model_segmented_dense_t*)super;
4439
2
  add_to_array(parameters, self->weights, is_trainable);
4440
2
  if (self->bias.graph)
4441
1
    add_to_array(parameters, self->bias, is_trainable);
4442
2
}
4443
4444
static ccv_cnnp_model_t* _ccv_cnnp_segmented_dense_copy(const ccv_cnnp_model_t* const super, void* const context);
4445
4446
static const ccv_cnnp_model_vtab_t ccv_cnnp_segmented_dense_isa = {
4447
  .build = _ccv_cnnp_segmented_dense_build,
4448
  .init_states = _ccv_cnnp_segmented_dense_init_states,
4449
  .add_to_parameter = _ccv_cnnp_segmented_dense_add_to_parameter,
4450
  .copy = _ccv_cnnp_segmented_dense_copy,
4451
};
4452
4453
ccv_cnnp_model_t* ccv_cnnp_segmented_dense(const int segments, const int count, const int no_bias, const int flags, const int is_trainable, const char* const name)
4454
2
{
4455
2
  ccv_cnnp_model_segmented_dense_t* const model_segmented_dense = (ccv_cnnp_model_segmented_dense_t*)cccalloc(1, sizeof(ccv_cnnp_model_segmented_dense_t));
4456
2
  model_segmented_dense->super.isa = &ccv_cnnp_segmented_dense_isa;
4457
2
  model_segmented_dense->super.input_size = 3;
4458
2
  model_segmented_dense->super.outputs = &model_segmented_dense->output;
4459
2
  model_segmented_dense->super.output_size = 1;
4460
2
  model_segmented_dense->super.is_trainable = is_trainable;
4461
2
  ccv_cnnp_model_copy_name(&model_segmented_dense->super, name);
4462
2
  model_segmented_dense->weights.d = CCV_NNC_NO_TENSOR_SYMBOL;
4463
2
  model_segmented_dense->weights.graph = 0;
4464
2
  model_segmented_dense->bias.d = CCV_NNC_NO_TENSOR_SYMBOL;
4465
2
  model_segmented_dense->bias.graph = 0;
4466
2
  model_segmented_dense->segments = segments;
4467
2
  model_segmented_dense->count = count;
4468
2
  model_segmented_dense->no_bias = no_bias;
4469
2
  model_segmented_dense->flags = flags;
4470
2
  return (ccv_cnnp_model_t*)model_segmented_dense;
4471
2
}
4472
4473
static ccv_cnnp_model_t* _ccv_cnnp_segmented_dense_copy(const ccv_cnnp_model_t* const super, void* const context)
4474
0
{
4475
0
  const ccv_cnnp_model_segmented_dense_t* const self = (const ccv_cnnp_model_segmented_dense_t*)super;
4476
0
  return ccv_cnnp_segmented_dense(self->segments, self->count, self->no_bias, self->flags, self->super.is_trainable, self->super.name);
4477
0
}