Coverage Report

Created: 2025-05-31 15:19

next uncovered line (L), next uncovered region (R), next uncovered branch (B)
/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_nnc_tensor.c
Line
Count
Source
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#ifdef HAVE_CUDA
5
#include "gpu/ccv_nnc_compat.h"
6
#elif defined(HAVE_MPS)
7
#include "mps/ccv_nnc_mps.h"
8
#endif
9
#include <fcntl.h>
10
#include <sys/mman.h>
11
12
// MARK - Level-1 API
13
14
const int ccv_nnc_no_ofs[CCV_NNC_MAX_DIM_ALLOC] = {0};
15
16
ccv_nnc_tensor_t* ccv_nnc_tensor_new(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
17
49.2k
{
18
49.2k
  ccv_nnc_tensor_t* tensor;
19
  // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t).
20
49.2k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC46.1k
&&
params.dim[2] > 040.0k
&&
params.dim[2] <= 4.88k
CCV_MAX_CHANNEL4.88k
&&
params.dim[0] > 04.88k
&&
params.dim[1] > 04.88k
&&
params.dim[3] == 04.88k
);
21
49.2k
  if (ptr || 
(flags & CCV_NO_DATA_ALLOC)47.4k
)
22
1.78k
  {
23
1.78k
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
24
1.78k
    tensor->dataof = 0;
25
1.78k
    tensor->alias_ref = 0;
26
1.78k
    tensor->sig = 0;
27
1.78k
    tensor->refcount = 1;
28
1.78k
    tensor->info = params;
29
1.78k
    if (tfb)
30
59
    {
31
59
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
32
      // This corresponding to mat->step
33
59
      tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
34
59
    } else // This won't be recognized by ccv_dense_matrix_t
35
1.72k
      tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
36
1.78k
    tensor->data.u8 = (uint8_t*)ptr;
37
1.78k
    return tensor;
38
1.78k
  }
39
47.4k
  if (flags & CCV_TENSOR_CPU_MEMORY)
40
0
  {
41
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
42
47.4k
  } else if (flags & CCV_TENSOR_GPU_MEMORY) {
43
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
44
0
  }
45
47.4k
  const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64;
46
47.4k
  const size_t size = ccv_nnc_tensor_data_size(params);
47
47.4k
#ifdef HAVE_CUDA
48
47.4k
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
49
2.60k
  {
50
2.60k
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
51
2.60k
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
52
2.60k
    if (size > 0)
53
2.60k
      tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
54
0
    else
55
0
      tensor->data.u8 = 0;
56
44.8k
  } else {
57
44.8k
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
58
44.8k
    ccmemalign((void **)&tensor, 64, tensor_hdr_size + size);
59
44.8k
    if (size > 0)
60
44.8k
      tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
61
0
    else
62
0
      tensor->data.u8 = 0;
63
44.8k
  }
64
#elif defined(HAVE_MPS)
65
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
66
  {
67
    tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
68
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
69
    if (size > 0)
70
      tensor->data.u8 = (uint8_t*)mpobjmalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
71
    else
72
      tensor->data.u8 = 0;
73
  } else {
74
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
75
    ccmemalign((void **)&tensor, 64, tensor_hdr_size + size);
76
    if (size > 0)
77
      tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
78
    else
79
      tensor->data.u8 = 0;
80
  }
81
#else
82
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
83
  ccmemalign((void **)&tensor, 64, tensor_hdr_size + size);
84
  if (size > 0)
85
    tensor->data.u8 = (uint8_t*)tensor + tensor_hdr_size;
86
  else
87
    tensor->data.u8 = 0;
88
#endif
89
47.4k
  tensor->dataof = 0;
90
47.4k
  tensor->alias_ref = 0;
91
47.4k
  tensor->data_size = size;
92
47.4k
  tensor->sig = 0;
93
47.4k
  tensor->refcount = 1;
94
47.4k
  tensor->info = params;
95
47.4k
  if (tfb)
96
4.40k
  {
97
4.40k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
98
    // This corresponding to mat->step
99
4.40k
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
100
4.40k
  } else
101
43.0k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
102
47.4k
  return tensor;
103
47.4k
}
104
105
ccv_nnc_tensor_t* ccv_nnc_tensor_new_from_file(const ccv_nnc_tensor_param_t params, const char* const filename, const off_t offset, const int flags)
106
4
{
107
4
  ccv_nnc_tensor_t* tensor;
108
  // this specific form can be toll-free bridging to ccv_dense_matrix_t (On CPU, and 3 dims (channels, rows, cols), and channels is smaller than max channels of ccv_dense_matrix_t).
109
4
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC2
&&
params.dim[2] > 02
&&
params.dim[2] <= 0
CCV_MAX_CHANNEL0
&&
params.dim[0] > 00
&&
params.dim[1] > 00
&&
params.dim[3] == 00
);
110
4
  tensor = (ccv_nnc_tensor_t*)ccmalloc(sizeof(ccv_nnc_tensor_t));
111
4
  tensor->dataof = 0;
112
4
  tensor->alias_ref = 0;
113
4
  tensor->sig = 0;
114
4
  tensor->refcount = 1;
115
4
  tensor->info = params;
116
4
  if (tfb)
117
0
  {
118
0
    tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
119
    // This corresponding to mat->step
120
0
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
121
0
  } else // This won't be recognized by ccv_dense_matrix_t
122
4
    tensor->type = CCV_NO_DATA_ALLOC | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
123
4
  const size_t size = ccv_nnc_tensor_data_size(params);
124
4
#ifdef HAVE_CUDA
125
4
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
126
2
  {
127
    // Remove this flag so it can be deallocated as usual.
128
2
    tensor->type &= ~CCV_NO_DATA_ALLOC;
129
2
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
130
2
    if (size > 0)
131
2
    {
132
2
      void* ptr = 0;
133
      // This is not supported yet on CUDA.
134
2
      if (flags & CCV_NNC_TENSOR_MEMORY_MAP_ON_DEMAND)
135
0
        ptr = cumallocmanaged(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
136
2
      if (ptr) // If allocated successfully. Otherwise we go through the fallback path.
137
0
      {
138
0
        tensor->data.u8 = (uint8_t*)ptr;
139
0
        int fd = open(filename, O_RDONLY, 0);
140
0
        cufileread(fd, offset, tensor->data.u8, size);
141
0
        close(fd);
142
0
        cumemadvisereadmostly(CCV_TENSOR_GET_DEVICE_ID(params.type), tensor->data.u8, size);
143
0
        tensor->type |= CCV_MAPPED_MEM; // This denotes the tensor is mapped to CPU, and would prefer a explicit prefetch call.
144
2
      } else {
145
2
        tensor->data.u8 = (uint8_t*)cumalloc(CCV_TENSOR_GET_DEVICE_ID(params.type), size);
146
2
        int fd = open(filename, O_RDONLY, 0);
147
2
        cufileread(fd, offset, tensor->data.u8, size);
148
2
        close(fd);
149
2
      }
150
2
    } else
151
0
      tensor->data.u8 = 0;
152
2
  } else {
153
2
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
154
2
    if (size > 0)
155
2
    {
156
2
      int fd = open(filename, O_RDONLY, 0);
157
2
      void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
158
2
      close(fd);
159
2
      madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
160
2
      tensor->data.u8 = bufptr;
161
2
      tensor->type |= CCV_MAPPED_MEM;
162
2
    } else
163
0
      tensor->data.u8 = 0;
164
2
  }
165
#elif defined(HAVE_MPS)
166
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
167
  {
168
    // Remove this flag so it can be deallocated as usual.
169
    tensor->type &= ~CCV_NO_DATA_ALLOC;
170
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
171
    if (size > 0)
172
      tensor->data.u8 = (uint8_t*)mpmemmap(filename, size, offset, flags);
173
    else
174
      tensor->data.u8 = 0;
175
  } else {
176
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
177
    if (size > 0)
178
    {
179
      int fd = open(filename, O_RDONLY, 0);
180
      void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
181
      close(fd);
182
      madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
183
      tensor->data.u8 = bufptr;
184
      tensor->type |= CCV_MAPPED_MEM;
185
    } else
186
      tensor->data.u8 = 0;
187
  }
188
#else
189
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
190
  if (size > 0)
191
  {
192
    int fd = open(filename, O_RDONLY, 0);
193
    void* bufptr = mmap(0, size, PROT_READ, MAP_PRIVATE, fd, offset);
194
    close(fd);
195
    madvise(bufptr, size, MADV_SEQUENTIAL | MADV_WILLNEED);
196
    tensor->data.u8 = bufptr;
197
    tensor->type |= CCV_MAPPED_MEM;
198
  } else
199
    tensor->data.u8 = 0;
200
#endif
201
4
  return tensor;
202
4
}
203
204
ccv_nnc_tensor_t* ccv_nnc_tensor_new_from_raw(const ccv_nnc_tensor_param_t params, const void* const bufptr, const size_t buf_size, const int flags)
205
0
{
206
0
  ccv_nnc_tensor_t* tensor = ccv_nnc_tensor_new(0, params, flags);
207
0
  const size_t size = ccv_min(ccv_nnc_tensor_data_size_without_padding(params), buf_size);
208
0
#ifdef HAVE_CUDA
209
0
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
210
0
  {
211
    // Remove this flag so it can be deallocated as usual.
212
0
    tensor->type &= ~CCV_NO_DATA_ALLOC;
213
0
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
214
0
    if (size > 0)
215
0
      cumemcpy(tensor->data.u8, tensor->info.type, bufptr, CCV_TENSOR_CPU_MEMORY, size);
216
0
    else
217
0
      tensor->data.u8 = 0;
218
0
  } else {
219
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
220
0
    if (size > 0)
221
0
      memcpy(tensor->data.u8, bufptr, size);
222
0
    else
223
0
      tensor->data.u8 = 0;
224
0
  }
225
#elif defined(HAVE_MPS)
226
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
227
  {
228
    // Remove this flag so it can be deallocated as usual.
229
    tensor->type &= ~CCV_NO_DATA_ALLOC;
230
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
231
    if (size > 0)
232
      mpmemcpy(tensor->data.u8, tensor->dataof, tensor->info.type, bufptr, 0, CCV_TENSOR_CPU_MEMORY, size);
233
    else
234
      tensor->data.u8 = 0;
235
  } else {
236
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
237
    if (size > 0)
238
      memcpy(tensor->data.u8, bufptr, size);
239
    else
240
      tensor->data.u8 = 0;
241
  }
242
#else
243
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
244
  if (size > 0)
245
    memcpy(tensor->data.u8, bufptr, size);
246
  else
247
    tensor->data.u8 = 0;
248
#endif
249
0
  return tensor;
250
0
}
251
252
ccv_nnc_tensor_t* ccv_nnc_tensor_resize(ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params)
253
1.27k
{
254
1.27k
  assert(!CCV_IS_TENSOR_VIEW(tensor));
255
1.27k
  assert(tensor->type & CCV_UNMANAGED);
256
1.27k
  assert(tensor->data_size > 0);
257
1.27k
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GET_MEMORY(tensor->info.type));
258
1.27k
  assert(CCV_TENSOR_GET_DEVICE(params.type) == CCV_TENSOR_GET_DEVICE(tensor->info.type));
259
1.27k
  const size_t size = ccv_nnc_tensor_data_size(params);
260
1.27k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC14
&&
params.dim[2] > 010
&&
params.dim[2] <= 10
CCV_MAX_CHANNEL10
&&
params.dim[0] > 010
&&
params.dim[1] > 010
&&
params.dim[3] == 010
);
261
1.27k
  tensor->info = params;
262
1.27k
#ifdef HAVE_CUDA
263
1.27k
  const int pinned_mem = (tensor->type & CCV_PINNED_MEM);
264
1.27k
#endif
265
1.27k
  if (tfb)
266
10
  {
267
10
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
268
    // This corresponding to mat->step
269
10
    tensor->info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
270
10
  } else
271
1.26k
    tensor->type = CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
272
1.27k
  if (size <= tensor->data_size) // Nothing.
273
1.27k
  {
274
1.27k
#ifdef HAVE_CUDA
275
1.27k
    if (pinned_mem)
276
4
      tensor->type |= CCV_PINNED_MEM;
277
1.27k
#endif
278
1.27k
    return tensor;
279
1.27k
  }
280
1
  ccv_nnc_tensor_t* new_tensor = tensor;
281
1
  const size_t tensor_hdr_size = (sizeof(ccv_nnc_tensor_t) + 63) & -64;
282
1
#ifdef HAVE_CUDA
283
1
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
284
0
  {
285
0
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
286
0
    const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type);
287
0
    assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type));
288
0
    cufree(device_id, tensor->data.u8);
289
0
    new_tensor->data.u8 = (uint8_t*)cumalloc(device_id, size);
290
1
  } else {
291
1
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
292
1
    assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
293
    // pin memory again.
294
1
    if (pinned_mem)
295
0
      cuunregister(new_tensor->data.u8);
296
1
    new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
297
1
    new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
298
1
  }
299
#elif defined(HAVE_MPS)
300
  if (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY)
301
  {
302
    assert(CCV_TENSOR_GET_DEVICE(params.type) != CCV_COMPUTE_DEVICE_ANY);
303
    const int device_id = CCV_TENSOR_GET_DEVICE_ID(params.type);
304
    assert(device_id == CCV_TENSOR_GET_DEVICE_ID(tensor->info.type));
305
    mpobjfree(device_id, tensor->data.u8);
306
    new_tensor->data.u8 = (uint8_t*)mpobjmalloc(device_id, size);
307
  } else {
308
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
309
    assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
310
    new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
311
    new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
312
  }
313
#else
314
  assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
315
  new_tensor = ccrealloc(new_tensor, tensor_hdr_size + size);
316
  new_tensor->data.u8 = (uint8_t*)new_tensor + tensor_hdr_size;
317
#endif
318
1
  new_tensor->data_size = size;
319
1
#ifdef HAVE_CUDA
320
1
  if (pinned_mem)
321
0
    ccv_nnc_tensor_pin_memory(new_tensor);
322
1
#endif
323
1
  return new_tensor;
324
1
}
325
326
ccv_nnc_tensor_t ccv_nnc_tensor(const void* const ptr, const ccv_nnc_tensor_param_t params, const int flags)
327
78.2k
{
328
  // this specific form can be toll-free bridging to ccv_dense_matrix_t
329
78.2k
  const int tfb = (CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY && 
params.format == CCV_TENSOR_FORMAT_NHWC69.4k
&&
params.dim[2] > 069.0k
&&
params.dim[2] <= 903
CCV_MAX_CHANNEL903
&&
params.dim[0] > 0903
&&
params.dim[1] > 0903
&&
params.dim[3] == 0903
);
330
78.2k
  ccv_nnc_tensor_t tensor;
331
78.2k
  tensor.dataof = 0;
332
78.2k
  tensor.alias_ref = 0;
333
78.2k
  tensor.sig = 0;
334
78.2k
  tensor.refcount = 1;
335
78.2k
  tensor.info = params;
336
78.2k
  if (flags & CCV_TENSOR_CPU_MEMORY)
337
0
  {
338
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY);
339
78.2k
  } else if (flags & CCV_TENSOR_GPU_MEMORY) {
340
0
    assert(CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_GPU_MEMORY);
341
0
  }
342
78.2k
  if (tfb)
343
196
  {
344
196
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype) | params.dim[2];
345
    // This corresponding to mat->step
346
196
    tensor.info.dim[4] = CCV_GET_STEP(params.dim[1], (CCV_GET_DATA_TYPE(params.datatype) | params.dim[2]));
347
196
  } else // This won't be recognized by ccv_dense_matrix_t
348
78.0k
    tensor.type = CCV_NO_DATA_ALLOC | CCV_UNMANAGED | CCV_MATRIX_DENSE | CCV_GET_DATA_TYPE(params.datatype);
349
78.2k
  if (params.dim[0] > 0)
350
78.2k
    tensor.data.u8 = (uint8_t*)ptr;
351
0
  else
352
0
    tensor.data.u8 = 0;
353
78.2k
  tensor.data_size = 0;
354
78.2k
  return tensor;
355
78.2k
}
356
357
int ccv_nnc_tensor_pin_memory(ccv_nnc_tensor_t* const tensor)
358
1.40k
{
359
1.40k
#ifdef HAVE_CUDA
360
1.40k
  assert(CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY);
361
1.40k
  if (!(tensor->type & CCV_PINNED_MEM) && 
tensor->data_size146
)
362
146
  {
363
146
    const int success = curegister(tensor->data.u8, tensor->data_size);
364
146
    if (success)
365
146
      tensor->type |= CCV_PINNED_MEM;
366
146
    return success ? 0 : 
-10
;
367
146
  }
368
1.25k
#endif
369
1.25k
  return 0;
370
1.40k
}
371
372
void ccv_nnc_tensor_free(ccv_nnc_tensor_t* const tensor)
373
49.1k
{
374
49.1k
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_CPU_MEMORY && 
(tensor->type & CCV_MAPPED_MEM)46.1k
)
375
2
  {
376
    // The size might be different than the ones when we allocated (for example, the tensor might rewrite its size to be smaller).
377
    // This might cause issues in the future.
378
2
    const size_t size = ccv_nnc_tensor_data_size(tensor->info);
379
2
    munmap(tensor->data.u8, size);
380
2
  }
381
49.1k
#ifdef HAVE_CUDA
382
49.1k
  if (tensor->type & CCV_PINNED_MEM)
383
146
    cuunregister(tensor->data.u8);
384
49.1k
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY &&
385
49.1k
    
!(tensor->type & CCV_NO_DATA_ALLOC)3.08k
) // If this is GPU memory and it is allocated, free.
386
2.60k
    cufree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8);
387
#elif defined(HAVE_MPS)
388
  if (CCV_TENSOR_GET_MEMORY(tensor->info.type) == CCV_TENSOR_GPU_MEMORY &&
389
    !(tensor->type & CCV_NO_DATA_ALLOC)) // If this is GPU memory and it is allocated, free.
390
    mpobjfree(CCV_TENSOR_GET_DEVICE_ID(tensor->info.type), tensor->data.u8);
391
#endif
392
49.1k
  ccfree(tensor);
393
49.1k
}
394
395
static inline void _ccv_nnc_tensor_view_set(ccv_nnc_tensor_view_t* const tv, const ccv_nnc_tensor_t* const tensor, const int dim[CCV_NNC_MAX_DIM_ALLOC], const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC])
396
177
{
397
177
  memcpy(tv->stride, stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
398
177
  memcpy(tv->info.dim, dim, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
399
177
  uint8_t* const p = tensor->data.u8;
400
177
  const off_t off = tv->off = ccv_nnc_tensor_view_offset(tv->info.datatype, stride, ofs);
401
177
  tv->contiguous = ccv_nnc_tensor_view_is_contiguous(dim, stride);
402
177
  assert(off + CCV_GET_DATA_TYPE_SIZE(tv->info.datatype) * ccv_nnc_dimension_upper_bound(tv->info.dim, tv->stride) <= CCV_GET_DATA_TYPE_SIZE(tensor->info.datatype) * ccv_nnc_tensor_count(tensor->info));
403
177
  ccv_nnc_tensor_data(tv->info, p, off + tensor->dataof, &tv->data, &tv->dataof);
404
177
}
405
406
ccv_nnc_tensor_view_t* ccv_nnc_tensor_view_new(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC])
407
96
{
408
96
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)ccmalloc(sizeof(ccv_nnc_tensor_view_t));
409
96
  tv->type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW;
410
96
  tv->dataof = 0;
411
96
  tv->alias_ref = (uintptr_t)tensor;
412
96
  tv->refcount = 1;
413
96
  tv->sig = 0;
414
96
  tv->data_size = 0;
415
96
  assert(params.type == tensor->info.type);
416
96
  assert(params.datatype == tensor->info.datatype);
417
96
  tv->info = params;
418
96
  _ccv_nnc_tensor_view_set(tv, tensor, params.dim, ofs, stride);
419
96
  return tv;
420
96
}
421
422
ccv_nnc_tensor_view_t ccv_nnc_tensor_view(const ccv_nnc_tensor_t* const tensor, const ccv_nnc_tensor_param_t params, const int ofs[CCV_NNC_MAX_DIM_ALLOC], const int stride[CCV_NNC_MAX_DIM_ALLOC])
423
81
{
424
81
  assert(!CCV_IS_TENSOR_VIEW(tensor));
425
81
  assert(params.type == tensor->info.type);
426
81
  assert(params.datatype == tensor->info.datatype);
427
81
  ccv_nnc_tensor_view_t tv = {
428
81
    .dataof = 0,
429
81
    .alias_ref = (uintptr_t)tensor,
430
81
    .type = (tensor->type & ~0xfff) | CCV_TENSOR_VIEW, // clean up the channel bits, and then add CCV_TENSOR_VIEW identifier
431
81
    .refcount = 1,
432
81
    .sig = 0,
433
81
    .info = params,
434
81
    .data_size = 0,
435
81
  };
436
81
  _ccv_nnc_tensor_view_set(&tv, tensor, params.dim, ofs, stride);
437
81
  return tv;
438
81
}
439
440
void ccv_nnc_tensor_view_free(ccv_nnc_tensor_view_t* const tensor_view)
441
96
{
442
96
  ccfree(tensor_view);
443
96
}
444
445
void _ccv_nnc_tensor_set_zero(unsigned char* u8, const int nd, const int* const dim, const int* const stride, const size_t data_size)
446
107
{
447
107
  if (nd == 1)
448
0
  {
449
0
    if (stride[0] == 1)
450
0
    {
451
0
      memset(u8, 0, data_size * dim[0]);
452
0
      return;
453
0
    }
454
0
    int i;
455
0
    for (i = 0; i < dim[0]; i++)
456
0
      memset(u8 + i * stride[0] * data_size, 0, data_size);
457
107
  } else if (nd == 2) {
458
2
    if (stride[1] == 1 && stride[0] == dim[1])
459
0
    {
460
0
      memset(u8, 0, data_size * dim[1] * dim[0]);
461
0
      return;
462
0
    }
463
2
    int x, y;
464
8
    for (y = 0; y < dim[0]; 
y++6
)
465
6
    {
466
6
      unsigned char* const u8y = u8 + y * stride[0] * data_size;
467
18
      for (x = 0; x < dim[1]; 
x++12
)
468
12
        memset(u8y + x * stride[1] * data_size, 0, data_size);
469
6
    }
470
105
  } else if (nd == 3) {
471
0
    if (stride[2] == 1 && stride[1] == dim[2] && stride[0] == dim[1] * dim[2])
472
0
    {
473
0
      memset(u8, 0, data_size * dim[2] * dim[1] * dim[0]);
474
0
      return;
475
0
    }
476
0
    int x, y, z;
477
0
    for (z = 0; z < dim[0]; z++)
478
0
    {
479
0
      unsigned char* const u8z = u8 + z * stride[0] * data_size;
480
0
      for (y = 0; y < dim[1]; y++)
481
0
      {
482
0
        unsigned char* const u8y = u8z + y * stride[1] * data_size;
483
0
        for (x = 0; x < dim[2]; x++)
484
0
          memset(u8y + x * stride[2] * data_size, 0, data_size);
485
0
      }
486
0
    }
487
105
  } else if (nd == 4) {
488
96
    if (stride[3] == 1 && stride[2] == dim[3] && 
stride[1] == dim[2] * dim[3]0
&&
stride[0] == dim[1] * dim[2] * dim[3]0
)
489
0
    {
490
0
      memset(u8, 0, data_size * dim[3] * dim[2] * dim[1] * dim[0]);
491
0
      return;
492
0
    }
493
96
    int x, y, z, s;
494
1.53k
    for (s = 0; s < dim[0]; 
s++1.44k
)
495
1.44k
    {
496
1.44k
      unsigned char* const u8s = u8 + s * stride[0] * data_size;
497
4.32k
      for (z = 0; z < dim[1]; 
z++2.88k
)
498
2.88k
      {
499
2.88k
        unsigned char* const u8z = u8s + z * stride[1] * data_size;
500
11.5k
        for (y = 0; y < dim[2]; 
y++8.64k
)
501
8.64k
        {
502
8.64k
          unsigned char* const u8y = u8z + y * stride[2] * data_size;
503
43.2k
          for (x = 0; x < dim[3]; 
x++34.5k
)
504
34.5k
            memset(u8y + x * stride[3] * data_size, 0, data_size);
505
8.64k
        }
506
2.88k
      }
507
1.44k
    }
508
96
  } else {
509
9
    int i;
510
113
    for (i = 0; i < dim[0]; 
i++104
)
511
104
      _ccv_nnc_tensor_set_zero(u8 + i * stride[0] * data_size, nd - 1, dim + 1, stride + 1, data_size);
512
9
  }
513
107
}
514
515
void ccv_nnc_tensor_zero(void* const tensor)
516
19.9k
{
517
19.9k
  ccv_nnc_tensor_view_t* tv = (ccv_nnc_tensor_view_t*)tensor;
518
19.9k
  const size_t data_size = CCV_GET_DATA_TYPE_SIZE(tv->info.datatype);
519
19.9k
  if (CCV_IS_TENSOR_CONTIGUOUS(tv))
520
19.9k
  {
521
19.9k
    memset(tv->data.u8, 0, data_size * ccv_nnc_tensor_count(tv->info));
522
19.9k
    return;
523
19.9k
  }
524
3
  const int nd = ccv_nnc_tensor_nd(tv->info.dim);
525
3
  assert(nd >= 1);
526
3
  const int* const tvstride = tv->stride;
527
  // Go through this recursively.
528
3
  _ccv_nnc_tensor_set_zero(tv->data.u8, nd, tv->info.dim, tvstride, data_size);
529
3
}
530
531
int ccv_nnc_tensor_eq(const ccv_nnc_tensor_t* const a, const ccv_nnc_tensor_t* const b)
532
842
{
533
842
  assert(!CCV_IS_TENSOR_VIEW(a));
534
842
  assert(!CCV_IS_TENSOR_VIEW(b));
535
  // If a is a dense matrix, just use ccv_matrix_eq
536
842
  if (CCV_TENSOR_IS_DENSE_MATRIX(a->type))
537
130
    return ccv_matrix_eq((ccv_matrix_t*)a, (ccv_matrix_t*)b);
538
  // Otherwise, do our own thing.
539
712
  if (CCV_GET_DATA_TYPE(a->type) != CCV_GET_DATA_TYPE(b->type))
540
0
    return -1;
541
712
  int i, c = 1;
542
2.02k
  for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC; 
i++1.31k
)
543
2.02k
  {
544
2.02k
    if (!a->info.dim[i] && 
!b->info.dim[i]712
)
545
712
      break;
546
1.31k
    if (a->info.dim[i] != b->info.dim[i])
547
0
      return -1;
548
1.31k
    c *= a->info.dim[i];
549
1.31k
  }
550
712
  if (CCV_GET_DATA_TYPE(a->type) == CCV_32S)
551
91
    return memcmp(a->data.i32, b->data.i32, sizeof(int) * c) == 0 ? 0 : 
-10
;
552
  // Only support 32F at this point.
553
712
  assert
(CCV_GET_DATA_TYPE(a->type) == CCV_32F || CCV_GET_DATA_TYPE(a->type) == CCV_64F)621
;
554
  // Read: http://www.cygnus-software.com/papers/comparingfloats/comparingfloats.htm
555
  // http://floating-point-gui.de/errors/comparison/
556
621
  if (CCV_GET_DATA_TYPE(a->type) == CCV_32F)
557
619
  {
558
619
    static const float epsi = FLT_EPSILON;
559
619
    static const int32_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same.
560
30.4M
    for (i = 0; i < c; 
i++30.4M
)
561
30.4M
    {
562
      // Although this is float point, I use integer as a way to compare.
563
30.4M
      int32_t i32a = a->data.i32[i];
564
30.4M
      if (i32a < 0)
565
4.82M
        i32a = 0x80000000 - i32a;
566
30.4M
      int32_t i32b = b->data.i32[i];
567
30.4M
      if (i32b < 0)
568
4.82M
        i32b = 0x80000000 - i32b;
569
30.4M
      if (abs(i32a - i32b) > ulps && 
fabsf(a->data.f32[i] - b->data.f32[i]) > epsi9.11k
)
570
0
        return -1;
571
30.4M
    }
572
619
  } else 
if (2
CCV_GET_DATA_TYPE2
(a->type) == CCV_64F2
) {
573
2
    typedef union {
574
2
      double f64;
575
2
      int64_t i64;
576
2
    } Float64;
577
2
    static const double epsi = DBL_EPSILON;
578
2
    static const int64_t ulps = 128; // so that for 1 and 1.000015 will be treated as the same.
579
15.8k
    for (i = 0; i < c; 
i++15.8k
)
580
15.8k
    {
581
      // Although this is float point, I use integer as a way to compare.
582
15.8k
      Float64 f64a, f64b;
583
15.8k
      f64a.f64 = a->data.f64[i];
584
15.8k
      f64b.f64 = b->data.f64[i];
585
15.8k
      if (f64a.i64 < 0)
586
0
        f64a.i64 = 0x8000000000000000 - f64a.i64;
587
15.8k
      if (f64b.i64 < 0)
588
0
        f64b.i64 = 0x8000000000000000 - f64b.i64;
589
15.8k
      if (llabs(f64a.i64 - f64b.i64) > ulps && 
fabs(a->data.f64[i] - b->data.f64[i]) > epsi0
)
590
0
        return -1;
591
15.8k
    }
592
2
  }
593
621
  return 0;
594
621
}
595
596
static void _strcat(char** str, int* written, size_t* len, char* from, int from_size)
597
1.34k
{
598
1.34k
  if (*len - *written < from_size)
599
0
  {
600
0
    *len += from_size * 2;
601
0
    *str = (char*)ccrealloc(*str, *len);
602
0
  }
603
1.34k
  memcpy(*str + *written, from, from_size);
604
1.34k
  *written += from_size;
605
1.34k
}
606
607
648
#define _STRPRINTF(str, written, len, format, ...) \
608
648
do { \
609
648
  const int newly_written = snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \
610
648
  if ((len) - (written) < newly_written) \
611
648
  { \
612
0
    (len) += newly_written * 2; \
613
0
    (str) = (char*)ccrealloc((str), (len)); \
614
0
    (written) += snprintf((str) + (written), (len) - (written), format, ## __VA_ARGS__); \
615
0
  } else \
616
648
    (written) += newly_written; \
617
648
} while (0)
618
619
static void _strv(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int i)
620
648
{
621
648
  if (a->info.datatype == CCV_32F)
622
0
    _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f32[i]);
623
648
  else if (a->info.datatype == CCV_64F)
624
0
    _STRPRINTF(*str, *written, *len, "%10.5g", a->data.f64[i]);
625
648
  else if (a->info.datatype == CCV_16F) {
626
0
    float v;
627
0
    ccv_half_precision_to_float((uint16_t*)(a->data.f16 + i), &v, 1);
628
0
    _STRPRINTF(*str, *written, *len, "%10.5g", v);
629
648
  } else if (a->info.datatype == CCV_32S)
630
648
    _STRPRINTF(*str, *written, *len, "%10d", a->data.i32[i]);
631
0
  else if (a->info.datatype == CCV_64S)
632
0
    _STRPRINTF(*str, *written, *len, "%12lld", (long long int)a->data.i64[i]);
633
0
  else if (a->info.datatype == CCV_8U)
634
0
    _STRPRINTF(*str, *written, *len, "%3d", (int)a->data.u8[i]);
635
648
}
636
637
static void _strt(char** str, int* written, size_t* len, const ccv_nnc_tensor_t* const a, int nd, int spacer, const int* const dim, const int* const stride, int idx)
638
28
{
639
28
  assert(nd != 1);
640
28
  if (nd == 2)
641
17
  {
642
    // Print columns and the rows.
643
17
    int i, j, k;
644
17
    if (dim[0] <= 8)
645
1
    {
646
5
      for (i = 0; i < dim[0]; 
i++4
)
647
4
      {
648
4
        if (i != 0)
649
3
        {
650
3
          _strcat(str, written, len, "  ", 2);
651
3
          for (k = 0; k < spacer; 
k++0
)
652
0
            _strcat(str, written, len, " ", 1);
653
3
        }
654
4
        _strcat(str, written, len, "[", 1);
655
4
        if (dim[1] <= 8)
656
4
        {
657
20
          for (j = 0; j < dim[1]; 
j++16
)
658
16
          {
659
16
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
660
16
            if (j < dim[1] - 1)
661
12
              _strcat(str, written, len, ", ", 2);
662
16
          }
663
4
          if (i < dim[0] - 1)
664
3
            _strcat(str, written, len, "],\n", 3);
665
4
        } else {
666
0
          for (j = 0; j < 3; j++)
667
0
          {
668
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
669
0
            _strcat(str, written, len, ", ", 2);
670
0
          }
671
0
          _strcat(str, written, len, " ..., ", 6);
672
0
          for (j = dim[1] - 3; j < dim[1]; j++)
673
0
          {
674
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
675
0
            if (j < dim[1] - 1)
676
0
              _strcat(str, written, len, ", ", 2);
677
0
          }
678
0
          if (i < dim[0] - 1)
679
0
            _strcat(str, written, len, "],\n", 3);
680
0
        }
681
4
      }
682
1
      _strcat(str, written, len, "]", 1);
683
16
    } else {
684
64
      for (i = 0; i < 3; 
i++48
)
685
48
      {
686
48
        if (i != 0)
687
32
        {
688
32
          _strcat(str, written, len, "  ", 2);
689
128
          for (k = 0; k < spacer; 
k++96
)
690
96
            _strcat(str, written, len, " ", 1);
691
32
        }
692
48
        _strcat(str, written, len, "[", 1);
693
48
        if (dim[1] <= 8)
694
0
        {
695
0
          for (j = 0; j < dim[1]; j++)
696
0
          {
697
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
698
0
            if (j < dim[1] - 1)
699
0
              _strcat(str, written, len, ", ", 2);
700
0
          }
701
0
          _strcat(str, written, len, "],\n", 3);
702
48
        } else {
703
192
          for (j = 0; j < 3; 
j++144
)
704
144
          {
705
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
706
144
            _strcat(str, written, len, ", ", 2);
707
144
          }
708
48
          _strcat(str, written, len, " ..., ", 6);
709
192
          for (j = dim[1] - 3; j < dim[1]; 
j++144
)
710
144
          {
711
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
712
144
            if (j < dim[1] - 1)
713
96
              _strcat(str, written, len, ", ", 2);
714
144
          }
715
48
          _strcat(str, written, len, "],\n", 3);
716
48
        }
717
48
      }
718
16
      _strcat(str, written, len, "  ", 2);
719
64
      for (k = 0; k < spacer; 
k++48
)
720
48
        _strcat(str, written, len, " ", 1);
721
16
      _strcat(str, written, len, "...,\n", 5);
722
64
      for (i = dim[0] - 3; i < dim[0]; 
i++48
)
723
48
      {
724
48
        _strcat(str, written, len, "  ", 2);
725
192
        for (k = 0; k < spacer; 
k++144
)
726
144
          _strcat(str, written, len, " ", 1);
727
48
        _strcat(str, written, len, "[", 1);
728
48
        if (dim[1] < 8)
729
0
        {
730
0
          for (j = 0; j < dim[1]; j++)
731
0
          {
732
0
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
733
0
            if (j < dim[1] - 1)
734
0
              _strcat(str, written, len, ", ", 2);
735
0
          }
736
0
          if (i < dim[0] - 1)
737
0
            _strcat(str, written, len, "],\n", 3);
738
48
        } else {
739
192
          for (j = 0; j < 3; 
j++144
)
740
144
          {
741
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
742
144
            _strcat(str, written, len, ", ", 2);
743
144
          }
744
48
          _strcat(str, written, len, " ..., ", 6);
745
192
          for (j = dim[1] - 3; j < dim[1]; 
j++144
)
746
144
          {
747
144
            _strv(str, written, len, a, idx + i * stride[0] + j * stride[1]);
748
144
            if (j < dim[1] - 1)
749
96
              _strcat(str, written, len, ", ", 2);
750
144
          }
751
48
          if (i < dim[0] - 1)
752
32
            _strcat(str, written, len, "],\n", 3);
753
48
        }
754
48
      }
755
16
      _strcat(str, written, len, "]", 1);
756
16
    }
757
17
    return;
758
17
  }
759
11
  int i, j;
760
11
  if (dim[0] > 4)
761
2
  {
762
6
    for (i = 0; i < 2; 
i++4
)
763
4
    {
764
4
      _strcat(str, written, len, "[", 1);
765
4
      _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i);
766
4
      _strcat(str, written, len, "],\n  ", 5);
767
8
      for (j = 0; j < spacer; 
j++4
)
768
4
        _strcat(str, written, len, " ", 1);
769
4
    }
770
2
    _strcat(str, written, len, "...,\n", 5);
771
2
    _strcat(str, written, len, "  ", 2);
772
4
    for (j = 0; j < spacer; 
j++2
)
773
2
      _strcat(str, written, len, " ", 1);
774
6
    for (i = dim[0] - 2; i < dim[0]; 
i++4
)
775
4
    {
776
4
      _strcat(str, written, len, "[", 1);
777
4
      _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i);
778
4
      if (i < dim[0] - 1)
779
2
      {
780
2
        _strcat(str, written, len, "],\n  ", 5);
781
4
        for (j = 0; j < spacer; 
j++2
)
782
2
          _strcat(str, written, len, " ", 1);
783
2
      }
784
4
    }
785
2
    _strcat(str, written, len, "]", 1);
786
9
  } else {
787
27
    for (i = 0; i < dim[0]; 
i++18
)
788
18
    {
789
18
      _strcat(str, written, len, "[", 1);
790
18
      _strt(str, written, len, a, nd - 1, spacer + 1, dim + 1, stride + 1, idx + stride[0] * i);
791
18
      if (i < dim[0] - 1)
792
9
      {
793
9
        _strcat(str, written, len, "],\n", 3);
794
9
        _strcat(str, written, len, "  ", 2);
795
25
        for (j = 0; j < spacer; 
j++16
)
796
16
          _strcat(str, written, len, " ", 1);
797
9
      }
798
18
    }
799
9
    _strcat(str, written, len, "]", 1);
800
9
  }
801
11
}
802
803
char* ccv_nnc_tensor_format_new(const ccv_nnc_tensor_t* const a)
804
4
{
805
4
  const int nd = ccv_nnc_tensor_nd(a->info.dim);
806
4
  int i;
807
4
  int rows = 8; // 8 rows for the first one, and then just first and last.
808
7
  for (i = 2; i < nd; 
i++3
)
809
3
    rows *= 5; // Maximum 3 rows beyond the first two.
810
4
  int columns = nd * 2 + 16 * 8;
811
4
  size_t len = sizeof(char) * columns * rows;
812
  // Allocate return string buffer.
813
4
  char* str = (char*)ccmalloc(len);
814
4
  int written = 0;
815
4
  int stride[CCV_NNC_MAX_DIM_ALLOC];
816
4
  if (CCV_IS_TENSOR_VIEW(a))
817
0
    memcpy(stride, ((ccv_nnc_tensor_view_t*)a)->stride, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC);
818
4
  else
819
4
    ccv_nnc_tensor_get_stride(a->info.dim, stride);
820
4
  _strcat(&str, &written, &len, "[\n  ", 4);
821
4
  if (nd == 1)
822
2
  {
823
    // Special casing for vector.
824
2
    if (a->info.dim[0] <= 64)
825
13
      
for (i = 0; 1
i < a->info.dim[0];
i++12
)
826
12
      {
827
12
        _strv(&str, &written, &len, a, i * stride[0]);
828
12
        if (i < a->info.dim[0] - 1)
829
11
        {
830
11
          if ((i + 1) % 8 == 0)
831
1
            _strcat(&str, &written, &len, ",\n  ", 4);
832
10
          else
833
10
            _strcat(&str, &written, &len, ", ", 2);
834
11
        }
835
12
      }
836
1
    else {
837
      // First 3 rows.
838
25
      for (i = 0; i < 24; 
i++24
)
839
24
      {
840
24
        _strv(&str, &written, &len, a, i * stride[0]);
841
24
        if ((i + 1) % 8 == 0)
842
3
          _strcat(&str, &written, &len, ",\n  ", 4);
843
21
        else
844
21
          _strcat(&str, &written, &len, ", ", 2);
845
24
      }
846
1
      _strcat(&str, &written, &len, "...,\n  ", 7);
847
      // Last 3 rows (aligned to 8 items per row).
848
1
      int start = ((a->info.dim[0] + 7) / 8 - 3) * 8;
849
21
      for (i = start; i < a->info.dim[0]; 
i++20
)
850
20
      {
851
20
        _strv(&str, &written, &len, a, i * stride[0]);
852
20
        if (i < a->info.dim[0] - 1)
853
19
        {
854
19
          if ((i + 1) % 8 == 0)
855
2
            _strcat(&str, &written, &len, ",\n  ", 4);
856
17
          else
857
17
            _strcat(&str, &written, &len, ", ", 2);
858
19
        }
859
20
      }
860
1
    }
861
2
  } else {
862
2
    _strt(&str, &written, &len, a, nd, 0, a->info.dim, stride, 0);
863
2
  }
864
4
  _strcat(&str, &written, &len, "\n]", 3); // Including the terminal \0.
865
4
  str = (char*)ccrealloc(str, written); // Don't need the extra spaces.
866
4
  return str;
867
4
}