Coverage Report

Created: 2019-07-03 22:50

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/util/ccv_nnc_util_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include <ccv.h>
2
#include <ccv_internal.h>
3
#include <nnc/ccv_nnc.h>
4
#include <nnc/ccv_nnc_easy.h>
5
#include <nnc/ccv_nnc_internal.h>
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
#include "../_ccv_nnc_cpu_ref.h"
13
14
void _ccv_nnc_tensor_transfer_cpu_ref(const ccv_nnc_tensor_view_t* const a, ccv_nnc_tensor_view_t* const b)
15
10.3k
{
16
10.3k
  // Assuming this is float 32.
17
10.3k
  assert(a->info.datatype == b->info.datatype);
18
10.3k
  if (!CCV_IS_TENSOR_VIEW(a) && 
!10.3k
CCV_IS_TENSOR_VIEW10.3k
(b))
19
10.3k
  {
20
10.3k
    // Super optimal case, just do memcpy.
21
10.3k
    memcpy(b->data.u8, a->data.u8, ccv_nnc_tensor_count(a->info) * CCV_GET_DATA_TYPE_SIZE(a->info.datatype));
22
10.3k
    return;
23
10.3k
  }
24
5
  int dim[CCV_NNC_MAX_DIM + 2];
25
5
  int ainc[CCV_NNC_MAX_DIM + 2];
26
5
  int binc[CCV_NNC_MAX_DIM + 2];
27
5
  ccv_nnc_tensor_view_get_dim(a, dim);
28
5
  assert(ccv_nnc_tensor_view_check_dim(b, dim));
29
5
  ccv_nnc_tensor_view_get_inc(a, ainc);
30
5
  ccv_nnc_tensor_view_get_inc(b, binc);
31
5
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
32
5
  int i[CCV_NNC_MAX_DIM + 2];
33
5
  float* ap = a->data.f32;
34
5
  float* bp = b->data.f32;
35
5
  if (ainc[3] == dim[3] && 
binc[3] == dim[3]4
)
36
0
  {
37
0
    // Special casing if the ainc[3] is the same as dim[3] (do memcpy for the last two dim)
38
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
39
0
    {
40
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
41
0
      {
42
0
        memcpy(bp, ap, dim[2] * dim[3] * sizeof(float));
43
0
        ap += ainc[2] * ainc[3];
44
0
        bp += binc[2] * binc[3];
45
0
      }
46
0
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
47
0
      bp += (binc[1] - dim[1]) * binc[2] * binc[3];
48
0
    }
49
0
    return;
50
0
  }
51
5
  // Non-optimal case, need to do skip copy.
52
10
  
for (i[0] = 0; 5
i[0] < dim[0];
i[0]++5
)
53
5
  {
54
11
    for (i[1] = 0; i[1] < dim[1]; 
i[1]++6
)
55
6
    {
56
16
      for (i[2] = 0; i[2] < dim[2]; 
i[2]++10
)
57
10
      {
58
10
        memcpy(bp, ap, dim[3] * sizeof(float));
59
10
        ap += ainc[3];
60
10
        bp += binc[3];
61
10
      }
62
6
      ap += (ainc[2] - dim[2]) * ainc[3];
63
6
      bp += (binc[2] - dim[2]) * binc[3];
64
6
    }
65
5
    ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
66
5
    bp += (binc[1] - dim[1]) * binc[2] * binc[3];
67
5
  }
68
5
}
69
70
void _ccv_nnc_tensor_set_cpu_ref(ccv_nnc_tensor_view_t* const a, const float b)
71
3.36k
{
72
3.36k
  // Assuming this is float 32.
73
3.36k
  int dim[CCV_NNC_MAX_DIM + 2];
74
3.36k
  int ainc[CCV_NNC_MAX_DIM + 2];
75
3.36k
  int x;
76
3.36k
  if (!CCV_IS_TENSOR_VIEW(a))
77
3.36k
  {
78
3.36k
    // Super optimal case, just do one for-loop for sum.
79
3.36k
    const int tensor_count = ccv_nnc_tensor_count(a->info);
80
8.43k
    for (x = 0; x < tensor_count; 
x++5.06k
)
81
5.06k
      a->data.f32[x] = b;
82
3.36k
    return;
83
3.36k
  }
84
0
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
85
0
  ccv_nnc_tensor_view_get_dim(a, dim);
86
0
  ccv_nnc_tensor_view_get_inc(a, ainc);
87
0
  int i[CCV_NNC_MAX_DIM + 2];
88
0
  float* ap = a->data.f32;
89
0
  const int count = dim[2] * dim[3];
90
0
  if (ainc[3] == dim[3])
91
0
  {
92
0
    // Special casing if the ainc[3] is the same as dim[3]
93
0
    for (i[0] = 0; i[0] < dim[0]; i[0]++)
94
0
    {
95
0
      for (i[1] = 0; i[1] < dim[1]; i[1]++)
96
0
      {
97
0
        for (x = 0; x < count; x++)
98
0
          ap[x] = b;
99
0
        ap += ainc[2] * ainc[3];
100
0
      }
101
0
      ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
102
0
    }
103
0
    return;
104
0
  }
105
0
  // Non-optimal case, need to do skip copy.
106
0
  for (i[0] = 0; i[0] < dim[0]; i[0]++)
107
0
  {
108
0
    for (i[1] = 0; i[1] < dim[1]; i[1]++)
109
0
    {
110
0
      for (i[2] = 0; i[2] < dim[2]; i[2]++)
111
0
      {
112
0
        for (x = 0; x < dim[3]; x++)
113
0
          ap[x] = b;
114
0
        ap += ainc[3];
115
0
      }
116
0
      ap += (ainc[2] - dim[2]) * ainc[3];
117
0
    }
118
0
    ap += (ainc[1] - dim[1]) * ainc[2] * ainc[3];
119
0
  }
120
0
}
121
122
static int _ccv_nnc_data_transfer(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
123
1.22k
{
124
1.22k
  assert(output_size <= input_size);
125
1.22k
  int i;
126
5.10k
  for (i = 0; i < output_size; 
i++3.87k
)
127
3.87k
  {
128
3.87k
    const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[i];
129
3.87k
    ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[i];
130
3.87k
    if (a != b) // Only do transfer if these are two different tensors.
131
3.66k
      _ccv_nnc_tensor_transfer_cpu_ref(a, b);
132
3.87k
  }
133
1.22k
  return CCV_NNC_EXEC_SUCCESS;
134
1.22k
}
135
136
REGISTER_COMMAND_BACKEND(CCV_NNC_DATA_TRANSFER_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
137
1
{
138
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
139
1
  registry->tensor_datatypes = CCV_32F;
140
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
141
1
  registry->algorithms = 1;
142
1
  registry->exec = _ccv_nnc_data_transfer;
143
1
}
144
145
REGISTER_COMMAND_BACKEND(CCV_NNC_DATA_TRANSFER_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
146
1
{
147
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
148
1
  registry->tensor_datatypes = CCV_32F;
149
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
150
1
  registry->algorithms = 1;
151
1
  registry->exec = _ccv_nnc_data_transfer;
152
1
}
153
154
static int _ccv_nnc_set_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
155
3.56k
{
156
3.56k
  int i;
157
3.56k
  if (cmd.info.blas.a[0] == 0)
158
404
    
for (i = 0; 202
i < output_size;
i++202
)
159
202
      ccv_nnc_tensor_zero(outputs[i]);
160
3.36k
  else
161
6.72k
    
for (i = 0; 3.36k
i < output_size;
i++3.36k
)
162
3.36k
      _ccv_nnc_tensor_set_cpu_ref((ccv_nnc_tensor_view_t*)outputs[i], cmd.info.blas.a[0]);
163
3.56k
  return CCV_NNC_EXEC_SUCCESS;
164
3.56k
}
165
166
static int _ccv_nnc_set_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
167
0
{
168
0
  int i;
169
0
  for (i = 0; i < output_size; i++)
170
0
    ccv_nnc_tensor_zero(outputs[i]);
171
0
  return CCV_NNC_EXEC_SUCCESS;
172
0
}
173
174
REGISTER_COMMAND_BACKEND(CCV_NNC_SET_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
175
1
{
176
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
177
1
  registry->tensor_datatypes = CCV_32F;
178
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
179
1
  registry->algorithms = 1;
180
1
  registry->exec = _ccv_nnc_set_forw;
181
1
}
182
183
REGISTER_COMMAND_BACKEND(CCV_NNC_SET_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
184
1
{
185
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
186
1
  registry->tensor_datatypes = CCV_32F;
187
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
188
1
  registry->algorithms = 1;
189
1
  registry->exec = _ccv_nnc_set_back;
190
1
}
191
192
static void _ccv_nnc_tensor_nhwc_nchw(const ccv_nnc_tensor_view_t* a, ccv_nnc_tensor_view_t* b)
193
2
{
194
2
  // Assuming this is float 32.
195
2
  int ainc[CCV_NNC_MAX_DIM + 2];
196
2
  int binc[CCV_NNC_MAX_DIM + 2];
197
2
  int k;
198
2
  // In case it is Toll-free bridged matrix object (NHWC format is possible).
199
2
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
200
2
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
201
2
  const int a_offset = CCV_NNC_MAX_DIM + 2 - a_nd;
202
2
  assert(a_offset == 0 || a_offset == 1);
203
2
  const int b_offset = CCV_NNC_MAX_DIM + 2 - b_nd;
204
2
  assert(b_offset == 0 || b_offset == 1);
205
2
  ccv_nnc_tensor_view_get_inc(a, ainc);
206
2
  ccv_nnc_tensor_view_get_inc(b, binc);
207
2
  // Comparing N
208
2
  assert((a_offset == 0 ? a->info.dim[0] : 1) == (b_offset == 0 ? b->info.dim[0] : 1));
209
2
  const int n = (a_offset == 0 ? 
a->info.dim[0]0
: 1);
210
2
  // Comparing C
211
2
  assert(a->info.dim[a_nd - 1] == b->info.dim[1 - b_offset]);
212
2
  const int c = a->info.dim[a_nd - 1];
213
2
  // Comparing HW
214
2
  int hw[CCV_NNC_MAX_DIM];
215
6
  for (k = 0; k < CCV_NNC_MAX_DIM; 
k++4
)
216
4
  {
217
4
    assert(a->info.dim[k + 1 - a_offset] == b->info.dim[k + 2 - b_offset]);
218
4
    hw[k] = a->info.dim[k + 1 - a_offset];
219
4
  }
220
2
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
221
2
  int i[CCV_NNC_MAX_DIM + 2];
222
2
  float* ap = a->data.f32;
223
2
  float* bp = b->data.f32;
224
2
  // Non-optimal case, need to do skip copy.
225
4
  for (i[0] = 0; i[0] < n; 
i[0]++2
)
226
2
  {
227
6
    for (i[3] = 0; i[3] < c; 
i[3]++4
)
228
4
    {
229
4
      float* apu = ap + i[3];
230
20
      for (i[1] = 0; i[1] < hw[0]; 
i[1]++16
)
231
16
      {
232
64
        for (i[2] = 0; i[2] < hw[1]; 
i[2]++48
)
233
48
          bp[i[2]] = apu[i[2] * ainc[3]];
234
16
        apu += ainc[2] * ainc[3];
235
16
        bp += binc[3];
236
16
      }
237
4
      bp += (binc[2] - hw[0]) * binc[3];
238
4
    }
239
2
    ap += ainc[1] * ainc[2] * ainc[3];
240
2
    bp += (binc[1] - c) * binc[2] * binc[3];
241
2
  }
242
2
}
243
244
static void _ccv_nnc_tensor_nchw_nhwc(const ccv_nnc_tensor_view_t* a, ccv_nnc_tensor_view_t* b)
245
2
{
246
2
  // Assuming this is float 32.
247
2
  int ainc[CCV_NNC_MAX_DIM + 2];
248
2
  int binc[CCV_NNC_MAX_DIM + 2];
249
2
  int k;
250
2
  // In case it is Toll-free bridged matrix object (NHWC format is possible).
251
2
  const int a_nd = ccv_nnc_tensor_nd(a->info.dim);
252
2
  const int b_nd = ccv_nnc_tensor_nd(b->info.dim);
253
2
  const int a_offset = CCV_NNC_MAX_DIM + 2 - a_nd;
254
2
  assert(a_offset == 0 || a_offset == 1);
255
2
  const int b_offset = CCV_NNC_MAX_DIM + 2 - b_nd;
256
2
  assert(b_offset == 0 || b_offset == 1);
257
2
  ccv_nnc_tensor_view_get_inc(a, ainc);
258
2
  ccv_nnc_tensor_view_get_inc(b, binc);
259
2
  // Comparing N
260
2
  assert((a_offset == 0 ? a->info.dim[0] : 1) == (b_offset == 0 ? b->info.dim[0] : 1));
261
2
  const int n = (a_offset == 0 ? 
a->info.dim[0]0
: 1);
262
2
  // Comparing C
263
2
  assert(a->info.dim[1 - a_offset] == b->info.dim[b_nd - 1]);
264
2
  const int c = a->info.dim[1 - a_offset];
265
2
  // Comparing HW
266
2
  int hw[CCV_NNC_MAX_DIM];
267
6
  for (k = 0; k < CCV_NNC_MAX_DIM; 
k++4
)
268
4
  {
269
4
    assert(a->info.dim[k + 2 - a_offset] == b->info.dim[k + 1 - b_offset]);
270
4
    hw[k] = a->info.dim[k + 2 - a_offset];
271
4
  }
272
2
  assert(CCV_NNC_MAX_DIM == 2); // Need to change this logic for CCV_NNC_MAX_DIM == other number.
273
2
  int i[CCV_NNC_MAX_DIM + 2];
274
2
  float* ap = a->data.f32;
275
2
  float* bp = b->data.f32;
276
2
  // Non-optimal case, need to do skip copy.
277
4
  for (i[0] = 0; i[0] < n; 
i[0]++2
)
278
2
  {
279
6
    for (i[3] = 0; i[3] < c; 
i[3]++4
)
280
4
    {
281
4
      float* bpu = bp + i[3];
282
20
      for (i[1] = 0; i[1] < hw[0]; 
i[1]++16
)
283
16
      {
284
64
        for (i[2] = 0; i[2] < hw[1]; 
i[2]++48
)
285
48
          bpu[i[2] * binc[3]] = ap[i[2]];
286
16
        ap += ainc[3];
287
16
        bpu += binc[2] * binc[3];
288
16
      }
289
4
      ap += (ainc[2] - hw[0]) * ainc[3];
290
4
    }
291
2
    ap += (ainc[1] - c) * ainc[2] * ainc[3];
292
2
    bp += binc[1] * binc[2] * binc[3];
293
2
  }
294
2
}
295
296
static int _ccv_nnc_format_transform(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
297
4
{
298
4
  assert(output_size <= input_size);
299
4
  int i;
300
8
  for (i = 0; i < output_size; 
i++4
)
301
4
  {
302
4
    const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[i];
303
4
    ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[i];
304
4
    assert(a != b); // Cannot do inplace transform.
305
4
    if (a->info.format == b->info.format) {
306
0
      // If it is the same, just do a normal data transfer.
307
0
      _ccv_nnc_tensor_transfer_cpu_ref(a, b);
308
4
    } else if (a->info.format == CCV_TENSOR_FORMAT_NHWC && 
b->info.format == CCV_TENSOR_FORMAT_NCHW2
) {
309
2
      _ccv_nnc_tensor_nhwc_nchw(a, b);
310
2
    } else if (a->info.format == CCV_TENSOR_FORMAT_NHWC && 
b->info.format == CCV_TENSOR_FORMAT_CHWN0
) {
311
2
    } else if (a->info.format == CCV_TENSOR_FORMAT_NCHW && b->info.format == CCV_TENSOR_FORMAT_NHWC) {
312
2
      _ccv_nnc_tensor_nchw_nhwc(a, b);
313
2
    } else 
if (0
a->info.format == CCV_TENSOR_FORMAT_NCHW0
&&
b->info.format == CCV_TENSOR_FORMAT_CHWN0
) {
314
0
    } else if (a->info.format == CCV_TENSOR_FORMAT_CHWN && b->info.format == CCV_TENSOR_FORMAT_NHWC) {
315
0
    } else if (a->info.format == CCV_TENSOR_FORMAT_CHWN && b->info.format == CCV_TENSOR_FORMAT_NCHW) {
316
0
    }
317
4
  }
318
4
  return CCV_NNC_EXEC_SUCCESS;
319
4
}
320
321
REGISTER_COMMAND_BACKEND(CCV_NNC_FORMAT_TRANSFORM_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
322
1
{
323
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
324
1
  registry->tensor_datatypes = CCV_32F;
325
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
326
1
  registry->algorithms = 1;
327
1
  registry->exec = _ccv_nnc_format_transform;
328
1
}
329
330
REGISTER_COMMAND_BACKEND(CCV_NNC_FORMAT_TRANSFORM_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
331
1
{
332
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
333
1
  registry->tensor_datatypes = CCV_32F;
334
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
335
1
  registry->algorithms = 1;
336
1
  registry->exec = _ccv_nnc_format_transform;
337
1
}
338
339
static int _ccv_nnc_datatype_conversion(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
340
1.76M
{
341
1.76M
  assert(output_size <= input_size);
342
1.76M
  int i;
343
3.52M
  for (i = 0; i < output_size; 
i++1.76M
)
344
1.76M
  {
345
1.76M
    const ccv_nnc_tensor_view_t* a = (ccv_nnc_tensor_view_t*)inputs[i];
346
1.76M
    ccv_nnc_tensor_view_t* b = (ccv_nnc_tensor_view_t*)outputs[i];
347
1.76M
    assert(a != b); // Cannot do inplace transform.
348
1.76M
    assert(a->info.format == b->info.format);
349
1.76M
    if (a->info.datatype == b->info.datatype) {
350
0
      // If it is the same, just do a normal data transfer.
351
0
      _ccv_nnc_tensor_transfer_cpu_ref(a, b);
352
1.76M
    } else if (a->info.datatype == CCV_32F && 
b->info.datatype == CCV_16F1.76M
) {
353
1.76M
      assert(!CCV_IS_TENSOR_VIEW(a));
354
1.76M
      assert(!CCV_IS_TENSOR_VIEW(b));
355
1.76M
      const size_t tensor_count = ccv_nnc_tensor_count(a->info);
356
1.76M
      assert(tensor_count == ccv_nnc_tensor_count(b->info));
357
1.76M
      ccv_float_to_half_precision(a->data.f32, (uint16_t*)b->data.f16, tensor_count);
358
1.76M
    } else 
if (82
a->info.datatype == CCV_16F82
&&
b->info.datatype == CCV_32F82
) {
359
82
      assert(!CCV_IS_TENSOR_VIEW(a));
360
82
      assert(!CCV_IS_TENSOR_VIEW(b));
361
82
      const int tensor_count = ccv_nnc_tensor_count(a->info);
362
82
      assert(tensor_count == ccv_nnc_tensor_count(b->info));
363
82
      ccv_half_precision_to_float((uint16_t*)a->data.f16, b->data.f32, tensor_count);
364
82
    }
365
1.76M
  }
366
1.76M
  return CCV_NNC_EXEC_SUCCESS;
367
1.76M
}
368
369
REGISTER_COMMAND_BACKEND(CCV_NNC_DATATYPE_CONVERSION_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
370
1
{
371
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
372
1
  registry->tensor_datatypes = CCV_32F | CCV_16F;
373
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
374
1
  registry->algorithms = 1;
375
1
  registry->exec = _ccv_nnc_datatype_conversion;
376
1
}
377
378
REGISTER_COMMAND_BACKEND(CCV_NNC_DATATYPE_CONVERSION_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
379
1
{
380
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NCHW | CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_CHWN;
381
1
  registry->tensor_datatypes = CCV_32F | CCV_16F;
382
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
383
1
  registry->algorithms = 1;
384
1
  registry->exec = _ccv_nnc_datatype_conversion;
385
1
}