File: | nnc/ccv_cnnp_dataframe_addons.c |
Warning: | line 475, column 27 Array access (via field 'f32') results in a null pointer dereference |
Press '?' to see keyboard shortcuts
Keyboard shortcuts:
1 | #include "ccv_nnc.h" | |||
2 | #include "ccv_nnc_easy.h" | |||
3 | #include "ccv_nnc_internal.h" | |||
4 | #include "ccv_internal.h" | |||
5 | #include "_ccv_cnnp_dataframe.h" | |||
6 | #include "3rdparty/sfmt/SFMT.h" | |||
7 | ||||
8 | // MARK - Create Dataframe from Array | |||
9 | ||||
10 | static void _ccv_cnnp_array_enum(const int column_idx, const int* const row_idxs, const int row_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) | |||
11 | { | |||
12 | int i; | |||
13 | ccv_array_t* const array = (ccv_array_t*)context; | |||
14 | for (i = 0; i < row_size; i++) | |||
15 | data[i] = ccv_array_get(array, row_idxs[i])((void*)(((char*)((array)->data)) + (size_t)(array)->rsize * (size_t)(row_idxs[i]))); | |||
16 | } | |||
17 | ||||
18 | ccv_cnnp_dataframe_t* ccv_cnnp_dataframe_from_array_new(ccv_array_t* const array) | |||
19 | { | |||
20 | const ccv_cnnp_column_data_t array_column_data = { | |||
21 | .data_enum = _ccv_cnnp_array_enum, | |||
22 | .context = array | |||
23 | }; | |||
24 | return ccv_cnnp_dataframe_new(&array_column_data, 1, array->rnum); | |||
25 | } | |||
26 | ||||
27 | typedef struct { | |||
28 | ccv_cnnp_dataframe_tuple_t tuple; | |||
29 | int tensor_offset; | |||
30 | int device_id; | |||
31 | } ccv_cnnp_copy_to_gpu_context_t; | |||
32 | ||||
33 | // MARK - Copy Tensors from CPU to GPU | |||
34 | ||||
35 | static void _ccv_cnnp_tensor_list_deinit(void* const data, void* const context) | |||
36 | { | |||
37 | ccv_cnnp_dataframe_tuple_t* const tuple = (ccv_cnnp_dataframe_tuple_t*)context; | |||
38 | ccv_nnc_tensor_t** const tensor_list = (ccv_nnc_tensor_t**)data; | |||
39 | int i; | |||
40 | for (i = 0; i < tuple->size; i++) | |||
41 | if (tensor_list[i]) | |||
42 | ccv_nnc_tensor_free(tensor_list[i]); | |||
43 | ccfreefree(tensor_list); | |||
44 | } | |||
45 | ||||
46 | static void _ccv_cnnp_copy_to_gpu(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) | |||
47 | { | |||
48 | const ccv_cnnp_copy_to_gpu_context_t* const copy_to_gpu_context = (ccv_cnnp_copy_to_gpu_context_t*)context; | |||
49 | int i, j; | |||
50 | for (i = 0; i < batch_size; i++) | |||
51 | { | |||
52 | ccv_nnc_tensor_t* const* const inputs = (ccv_nnc_tensor_t* const*)column_data[0][i] + copy_to_gpu_context->tensor_offset; | |||
53 | ccv_nnc_tensor_t** outputs = (ccv_nnc_tensor_t**)data[i]; | |||
54 | if (!outputs) | |||
55 | outputs = (ccv_nnc_tensor_t**)(data[i] = cccalloccalloc(copy_to_gpu_context->tuple.size, sizeof(ccv_nnc_tensor_t*))); | |||
56 | for (j = 0; j < copy_to_gpu_context->tuple.size; j++) | |||
57 | { | |||
58 | ccv_nnc_tensor_param_t params = inputs[j]->info; | |||
59 | params.type &= ~CCV_TENSOR_CPU_MEMORY; | |||
60 | params.type |= CCV_TENSOR_GPU_MEMORY; // Change to GPU memory. | |||
61 | CCV_TENSOR_SET_DEVICE_ID(params.type, copy_to_gpu_context->device_id)(params.type) = (((params.type) & ~0xfff00) | (((copy_to_gpu_context ->device_id) & 0xfff) << 8)); | |||
62 | outputs[j] = outputs[j] ? ccv_nnc_tensor_resize(outputs[j], params) : ccv_nnc_tensor_new(0, params, 0); | |||
63 | ccv_nnc_tensor_pin_memory(inputs[j]); | |||
64 | } | |||
65 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD()ccv_nnc_cmd(CCV_NNC_DATA_TRANSFER_FORWARD, 0, ccv_nnc_cmd_auto , 0), ccv_nnc_no_hint, 0, inputs, copy_to_gpu_context->tuple.size, outputs, copy_to_gpu_context->tuple.size, stream_context); | |||
66 | } | |||
67 | } | |||
68 | ||||
69 | int ccv_cnnp_dataframe_copy_to_gpu(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const int tensor_offset, const int tensor_size, const int device_id, const char* name) | |||
70 | { | |||
71 | assert(tensor_size > 0)((void) sizeof ((tensor_size > 0) ? 1 : 0), __extension__ ( { if (tensor_size > 0) ; else __assert_fail ("tensor_size > 0" , "ccv_cnnp_dataframe_addons.c", 71, __extension__ __PRETTY_FUNCTION__ ); })); | |||
72 | int stream_type = CCV_STREAM_CONTEXT_GPU; | |||
73 | CCV_STREAM_SET_DEVICE_ID(stream_type, device_id)(stream_type) = (((stream_type) & ~0xfff00) | (((device_id ) & 0xfff) << 8)); | |||
74 | ccv_cnnp_copy_to_gpu_context_t* const copy_to_gpu_context = (ccv_cnnp_copy_to_gpu_context_t*)ccmallocmalloc(sizeof(ccv_cnnp_copy_to_gpu_context_t)); | |||
75 | copy_to_gpu_context->tuple.size = tensor_size; | |||
76 | copy_to_gpu_context->tensor_offset = tensor_offset; | |||
77 | copy_to_gpu_context->device_id = device_id; | |||
78 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_copy_to_gpu, stream_type, _ccv_cnnp_tensor_list_deinit, COLUMN_ID_LIST(column_idx)(const int []){column_idx}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), copy_to_gpu_context, (ccv_cnnp_column_data_context_deinit_f)ccfreefree, name); | |||
79 | } | |||
80 | ||||
81 | // MARK - Use Command to Generate Output Tuple | |||
82 | ||||
83 | typedef struct { | |||
84 | ccv_cnnp_dataframe_tuple_t tuple; | |||
85 | int input_offset; | |||
86 | int input_size; | |||
87 | ccv_nnc_cmd_t cmd; | |||
88 | ccv_nnc_hint_t hint; | |||
89 | int flags; | |||
90 | ccv_nnc_tensor_param_t output_params[1]; | |||
91 | } ccv_cnnp_cmd_exec_context_t; | |||
92 | ||||
93 | static void _ccv_cnnp_dataframe_cmd_exec(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) | |||
94 | { | |||
95 | const ccv_cnnp_cmd_exec_context_t* const cmd_exec_context = (ccv_cnnp_cmd_exec_context_t*)context; | |||
96 | int i, j; | |||
97 | for (i = 0; i < batch_size; i++) | |||
98 | { | |||
99 | ccv_nnc_tensor_t* const* const inputs = (ccv_nnc_tensor_t* const*)column_data[0][i] + cmd_exec_context->input_offset; | |||
100 | ccv_nnc_tensor_t** outputs = (ccv_nnc_tensor_t**)data[i]; | |||
101 | if (!outputs) | |||
102 | { | |||
103 | outputs = (ccv_nnc_tensor_t**)(data[i] = ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * cmd_exec_context->tuple.size)); | |||
104 | for (j = 0; j < cmd_exec_context->tuple.size; j++) | |||
105 | outputs[j] = ccv_nnc_tensor_new(0, cmd_exec_context->output_params[j], 0); | |||
106 | } | |||
107 | ccv_nnc_cmd_exec(cmd_exec_context->cmd, cmd_exec_context->hint, cmd_exec_context->flags, inputs, cmd_exec_context->input_size, outputs, cmd_exec_context->tuple.size, stream_context); | |||
108 | } | |||
109 | } | |||
110 | ||||
111 | int ccv_cnnp_dataframe_cmd_exec(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const int input_offset, const int input_size, const ccv_nnc_tensor_param_t* const output_params, const int output_size, const int stream_type, const char* name) | |||
112 | { | |||
113 | assert(input_size > 0)((void) sizeof ((input_size > 0) ? 1 : 0), __extension__ ( { if (input_size > 0) ; else __assert_fail ("input_size > 0" , "ccv_cnnp_dataframe_addons.c", 113, __extension__ __PRETTY_FUNCTION__ ); })); | |||
114 | assert(output_size > 0)((void) sizeof ((output_size > 0) ? 1 : 0), __extension__ ( { if (output_size > 0) ; else __assert_fail ("output_size > 0" , "ccv_cnnp_dataframe_addons.c", 114, __extension__ __PRETTY_FUNCTION__ ); })); | |||
115 | ccv_cnnp_cmd_exec_context_t* const cmd_exec_context = (ccv_cnnp_cmd_exec_context_t*)ccmallocmalloc(sizeof(ccv_cnnp_cmd_exec_context_t) + sizeof(ccv_nnc_tensor_param_t) * (output_size - 1)); | |||
116 | cmd_exec_context->tuple.size = output_size; | |||
117 | cmd_exec_context->input_offset = input_offset; | |||
118 | cmd_exec_context->input_size = input_size; | |||
119 | cmd_exec_context->cmd = cmd; | |||
120 | cmd_exec_context->hint = hint; | |||
121 | cmd_exec_context->flags = flags; | |||
122 | memcpy(cmd_exec_context->output_params, output_params, sizeof(ccv_nnc_tensor_param_t) * output_size); | |||
123 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_dataframe_cmd_exec, stream_type, _ccv_cnnp_tensor_list_deinit, COLUMN_ID_LIST(column_idx)(const int []){column_idx}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), cmd_exec_context, (ccv_cnnp_column_data_context_deinit_f)ccfreefree, name); | |||
124 | return 0; | |||
125 | } | |||
126 | ||||
127 | // MARK - Make Auxiliary Tensor as a new Column | |||
128 | ||||
129 | static void _ccv_cnnp_tensor_deinit(void* const data, void* const context) | |||
130 | { | |||
131 | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)data); | |||
132 | } | |||
133 | ||||
134 | static void _ccv_cnnp_tensor_new(const int column_idx, const int* const row_idxs, const int row_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) | |||
135 | { | |||
136 | ccv_nnc_tensor_param_t params = *(ccv_nnc_tensor_param_t*)context; | |||
137 | int i; | |||
138 | for (i = 0; i < row_size; i++) | |||
139 | if (!data[i]) | |||
140 | data[i] = ccv_nnc_tensor_new(0, params, 0); | |||
141 | } | |||
142 | ||||
143 | int ccv_cnnp_dataframe_add_aux(ccv_cnnp_dataframe_t* const dataframe, const ccv_nnc_tensor_param_t params, const char* name) | |||
144 | { | |||
145 | int stream_type = CCV_TENSOR_GET_MEMORY(params.type)((params.type) & 0x3) == CCV_TENSOR_CPU_MEMORY ? 0 : CCV_STREAM_CONTEXT_GPU; | |||
146 | if (stream_type == CCV_STREAM_CONTEXT_GPU) | |||
147 | CCV_STREAM_SET_DEVICE_ID(stream_type, CCV_TENSOR_GET_DEVICE_ID(params.type))(stream_type) = (((stream_type) & ~0xfff00) | ((((((params .type) & 0xfff00) >> 8)) & 0xfff) << 8)); | |||
148 | ccv_nnc_tensor_param_t* const context = (ccv_nnc_tensor_param_t*)ccmallocmalloc(sizeof(ccv_nnc_tensor_param_t)); | |||
149 | context[0] = params; | |||
150 | return ccv_cnnp_dataframe_add(dataframe, _ccv_cnnp_tensor_new, stream_type, _ccv_cnnp_tensor_deinit, context, (ccv_cnnp_column_data_context_deinit_f)ccfreefree, name); | |||
151 | } | |||
152 | ||||
153 | // MARK - Load Tensor from File Path | |||
154 | ||||
155 | static void _ccv_cnnp_image_deinit(void* const data, void* const context) | |||
156 | { | |||
157 | ccv_matrix_free(data); | |||
158 | } | |||
159 | ||||
160 | static void _ccv_cnnp_read_image(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) | |||
161 | { | |||
162 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
163 | if (data[i]) | |||
164 | ccv_matrix_free(data[i]); | |||
165 | off_t structof = (off_t)context; | |||
166 | const char* const filename = *(char* const*)((const char*)column_data[0][i] + structof); | |||
167 | data[i] = 0; | |||
168 | ccv_read(filename, (ccv_dense_matrix_t**)&data[i], CCV_IO_ANY_FILE | CCV_IO_RGB_COLOR)ccv_read_impl(filename, (ccv_dense_matrix_t**)&data[i], CCV_IO_ANY_FILE | CCV_IO_RGB_COLOR, 0, 0, 0); | |||
169 | } parallel_endfor} } | |||
170 | } | |||
171 | ||||
172 | int ccv_cnnp_dataframe_read_image(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const off_t structof, const char* name) | |||
173 | { | |||
174 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_read_image, 0, _ccv_cnnp_image_deinit, COLUMN_ID_LIST(column_idx)(const int []){column_idx}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), (void*)(uintptr_t)structof, 0, name); | |||
175 | } | |||
176 | ||||
177 | // MARK - Apply Random Jitter to Image | |||
178 | ||||
179 | typedef struct { | |||
180 | sfmt_t sfmt; | |||
181 | int datatype; | |||
182 | ccv_cnnp_random_jitter_t random_jitter; | |||
183 | } ccv_cnnp_random_jitter_context_t; | |||
184 | ||||
185 | static void _ccv_cnnp_image_lighting(ccv_dense_matrix_t* image, const float alpha_r, const float alpha_g, const float alpha_b) | |||
186 | { | |||
187 | assert(CCV_GET_DATA_TYPE(image->type) == CCV_32F)((void) sizeof ((((image->type) & 0xFF000) == CCV_32F) ? 1 : 0), __extension__ ({ if (((image->type) & 0xFF000 ) == CCV_32F) ; else __assert_fail ("CCV_GET_DATA_TYPE(image->type) == CCV_32F" , "ccv_cnnp_dataframe_addons.c", 187, __extension__ __PRETTY_FUNCTION__ ); })); | |||
188 | assert(CCV_GET_CHANNEL(image->type) == CCV_C3)((void) sizeof ((((image->type) & 0xFFF) == CCV_C3) ? 1 : 0), __extension__ ({ if (((image->type) & 0xFFF) == CCV_C3) ; else __assert_fail ("CCV_GET_CHANNEL(image->type) == CCV_C3" , "ccv_cnnp_dataframe_addons.c", 188, __extension__ __PRETTY_FUNCTION__ ); })); | |||
189 | // These eigenvector values can be computed out of imageNet dataset (see ccv_convnet for how that is done). Here I just copied | |||
190 | // from mxnet: https://github.com/apache/incubator-mxnet/blob/master/src/operator/image/image_random-inl.h#L632 | |||
191 | const float pca_r = alpha_r * (55.46 * -0.5675) + alpha_g * (4.794 * 0.7192) + alpha_b * (1.148 * 0.4009); | |||
192 | const float pca_g = alpha_r * (55.46 * -0.5808) + alpha_g * (4.794 * -0.0045) + alpha_b * (1.148 * -0.8140); | |||
193 | const float pca_b = alpha_r * (55.46 * -0.5836) + alpha_g * (4.794 * -0.6948) + alpha_b * (1.148 * 0.4203); | |||
194 | int i; | |||
195 | const int size = image->rows * image->cols; | |||
196 | float* const ptr = image->data.f32; | |||
197 | for (i = 0; i < size; i++) | |||
198 | { | |||
199 | ptr[i * 3] = ccv_clamp(ptr[i * 3] + pca_r, 0, 255)({ typeof (0) _a = (0); typeof (255) _b = (255); typeof (ptr[ i * 3] + pca_r) _x = (ptr[i * 3] + pca_r); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | |||
200 | ptr[i * 3 + 1] = ccv_clamp(ptr[i * 3 + 1] + pca_g, 0, 255)({ typeof (0) _a = (0); typeof (255) _b = (255); typeof (ptr[ i * 3 + 1] + pca_g) _x = (ptr[i * 3 + 1] + pca_g); (_x < _a ) ? _a : ((_x > _b) ? _b : _x); }); | |||
201 | ptr[i * 3 + 2] = ccv_clamp(ptr[i * 3 + 2] + pca_b, 0, 255)({ typeof (0) _a = (0); typeof (255) _b = (255); typeof (ptr[ i * 3 + 2] + pca_b) _x = (ptr[i * 3 + 2] + pca_b); (_x < _a ) ? _a : ((_x > _b) ? _b : _x); }); | |||
202 | } | |||
203 | } | |||
204 | ||||
205 | static float _ccv_cnnp_random_logexp(sfmt_t* const sfmt, const float jitter) | |||
206 | { | |||
207 | // We want to get something around logarithmic scale, thus, 0 is no good, and infinity is no good. 1 is the same. | |||
208 | // jitter is some turbulence we want around 1. We want the range range to be around [1 / (1 + jitter), 1 + jitter] | |||
209 | // but the distribution is not uniform (50% fall under 1, and 50% fall above 1). The way to do this is to first | |||
210 | // get to logarithmic range, doing a uniform sampling, and then convert back. | |||
211 | double log_jitter_limit = log(1 + jitter); | |||
212 | double log_random_jitter = sfmt_genrand_real1(sfmt) * 2 * log_jitter_limit - log_jitter_limit; | |||
213 | return (float)exp(log_random_jitter); // Convert it back to exponential form. | |||
214 | } | |||
215 | ||||
216 | static void _ccv_cnnp_image_manip(ccv_dense_matrix_t* image, const ccv_cnnp_random_jitter_t random_jitter, sfmt_t* const sfmt) | |||
217 | { | |||
218 | assert(sfmt && CCV_GET_CHANNEL(image->type) == CCV_C3)((void) sizeof ((sfmt && ((image->type) & 0xFFF ) == CCV_C3) ? 1 : 0), __extension__ ({ if (sfmt && ( (image->type) & 0xFFF) == CCV_C3) ; else __assert_fail ("sfmt && CCV_GET_CHANNEL(image->type) == CCV_C3" , "ccv_cnnp_dataframe_addons.c", 218, __extension__ __PRETTY_FUNCTION__ ); })); | |||
219 | int idx[4] = {0, 1, 2, 3}; | |||
220 | sfmt_genrand_shuffle(sfmt, idx, 4, sizeof(int)); | |||
221 | int i; | |||
222 | for (i = 0; i < 4; i++) | |||
223 | // change the applying order | |||
224 | switch (idx[i]) | |||
225 | { | |||
226 | case 0: | |||
227 | if (random_jitter.brightness == 0) | |||
228 | break; | |||
229 | // introduce some brightness changes to the original image | |||
230 | ccv_scale(image, (ccv_matrix_t**)&image, 0, _ccv_cnnp_random_logexp(sfmt, random_jitter.brightness)); | |||
231 | break; | |||
232 | case 1: | |||
233 | // introduce some saturation changes to the original image | |||
234 | if (random_jitter.saturation == 0) | |||
235 | break; | |||
236 | ccv_saturation(image, &image, 0, _ccv_cnnp_random_logexp(sfmt, random_jitter.saturation)); | |||
237 | break; | |||
238 | case 2: | |||
239 | // introduce some contrast changes to the original image | |||
240 | if (random_jitter.contrast == 0) | |||
241 | break; | |||
242 | ccv_contrast(image, &image, 0, _ccv_cnnp_random_logexp(sfmt, random_jitter.contrast)); | |||
243 | break; | |||
244 | case 3: | |||
245 | if (random_jitter.lighting == 0) | |||
246 | break; | |||
247 | _ccv_cnnp_image_lighting(image, sfmt_genrand_real1(sfmt) * random_jitter.lighting, sfmt_genrand_real1(sfmt) * random_jitter.lighting, sfmt_genrand_real1(sfmt) * random_jitter.lighting); | |||
248 | break; | |||
249 | } | |||
250 | } | |||
251 | ||||
252 | static void _ccv_cnnp_normalize(ccv_dense_matrix_t* const image, const float mean[3], const float inv_std[3]) | |||
253 | { | |||
254 | int i; | |||
255 | const int count = image->rows * image->cols; | |||
256 | float* ap = image->data.f32; | |||
257 | for (i = 0; i < count; i++) | |||
258 | { | |||
259 | ap[i * 3] = (ap[i * 3] - mean[0]) * inv_std[0]; | |||
260 | ap[i * 3 + 1] = (ap[i * 3 + 1] - mean[1]) * inv_std[1]; | |||
261 | ap[i * 3 + 2] = (ap[i * 3 + 2] - mean[2]) * inv_std[2]; | |||
262 | } | |||
263 | } | |||
264 | ||||
265 | static void _ccv_cnnp_random_jitter(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) | |||
266 | { | |||
267 | sfmt_t* const sfmt = (sfmt_t*)ccmallocmalloc(sizeof(sfmt_t) * batch_size); | |||
268 | ccv_cnnp_random_jitter_context_t* const ctx = (ccv_cnnp_random_jitter_context_t*)context; | |||
269 | int i; | |||
270 | for (i = 0; i < batch_size; i++) | |||
271 | sfmt_init_gen_rand(&sfmt[i], sfmt_genrand_uint32(&ctx->sfmt)); | |||
272 | const ccv_cnnp_random_jitter_t random_jitter = ctx->random_jitter; | |||
273 | assert(random_jitter.resize.min > 0)((void) sizeof ((random_jitter.resize.min > 0) ? 1 : 0), __extension__ ({ if (random_jitter.resize.min > 0) ; else __assert_fail ("random_jitter.resize.min > 0", "ccv_cnnp_dataframe_addons.c" , 273, __extension__ __PRETTY_FUNCTION__); })); | |||
274 | assert(random_jitter.resize.max >= random_jitter.resize.min)((void) sizeof ((random_jitter.resize.max >= random_jitter .resize.min) ? 1 : 0), __extension__ ({ if (random_jitter.resize .max >= random_jitter.resize.min) ; else __assert_fail ("random_jitter.resize.max >= random_jitter.resize.min" , "ccv_cnnp_dataframe_addons.c", 274, __extension__ __PRETTY_FUNCTION__ ); })); | |||
275 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
276 | if (data[i]) | |||
277 | ccv_matrix_free(data[i]); | |||
278 | ccv_dense_matrix_t* const input = (ccv_dense_matrix_t*)column_data[0][i]; | |||
279 | const int resize = ccv_clamp((int)(sfmt_genrand_real1(&sfmt[i]) * (random_jitter.resize.max - random_jitter.resize.min) + 0.5) + random_jitter.resize.min, random_jitter.resize.min, random_jitter.resize.max)({ typeof (random_jitter.resize.min) _a = (random_jitter.resize .min); typeof (random_jitter.resize.max) _b = (random_jitter. resize.max); typeof ((int)(sfmt_genrand_real1(&sfmt[i]) * (random_jitter.resize.max - random_jitter.resize.min) + 0.5) + random_jitter.resize.min) _x = ((int)(sfmt_genrand_real1(& sfmt[i]) * (random_jitter.resize.max - random_jitter.resize.min ) + 0.5) + random_jitter.resize.min); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | |||
280 | int resize_rows = ccv_max(resize, (int)(input->rows * (float)resize / input->cols + 0.5))({ typeof (resize) _a = (resize); typeof ((int)(input->rows * (float)resize / input->cols + 0.5)) _b = ((int)(input-> rows * (float)resize / input->cols + 0.5)); (_a > _b) ? _a : _b; }); | |||
281 | int resize_cols = ccv_max(resize, (int)(input->cols * (float)resize / input->rows + 0.5))({ typeof (resize) _a = (resize); typeof ((int)(input->cols * (float)resize / input->rows + 0.5)) _b = ((int)(input-> cols * (float)resize / input->rows + 0.5)); (_a > _b) ? _a : _b; }); | |||
282 | if (random_jitter.aspect_ratio > 0) | |||
283 | { | |||
284 | const float aspect_ratio = sqrtf(_ccv_cnnp_random_logexp(&sfmt[i], random_jitter.aspect_ratio)); | |||
285 | resize_rows = (int)(resize_rows * aspect_ratio + 0.5); | |||
286 | resize_cols = (int)(resize_cols / aspect_ratio + 0.5); | |||
287 | } | |||
288 | if (random_jitter.resize.roundup > 0) | |||
289 | { | |||
290 | const int roundup = random_jitter.resize.roundup; | |||
291 | const int roundup_2 = roundup / 2; | |||
292 | resize_rows = (resize_rows + roundup_2) / roundup * roundup; | |||
293 | resize_cols = (resize_cols + roundup_2) / roundup * roundup; | |||
294 | } | |||
295 | const int need_crop = (random_jitter.size.cols > 0 && random_jitter.size.rows > 0 && | |||
296 | ((resize_cols != random_jitter.size.cols || resize_rows != random_jitter.size.rows) || | |||
297 | (random_jitter.offset.x != 0 || random_jitter.offset.y != 0))); | |||
298 | int cropped = 0, crop_x = 0, crop_y = 0; | |||
299 | ccv_dense_matrix_t* sliced = 0; | |||
300 | if (need_crop) | |||
301 | { | |||
302 | // Compute crop x, y. | |||
303 | crop_x = random_jitter.center_crop ? | |||
304 | (resize_cols - random_jitter.size.cols + 1) / 2 : // Otherwise, random select x. | |||
305 | (int)(sfmt_genrand_real1(&sfmt[i]) * (resize_cols - random_jitter.size.cols + 1)); | |||
306 | crop_x = ccv_clamp(crop_x,({ typeof (({ typeof (0) _a = (0); typeof (resize_cols - random_jitter .size.cols) _b = (resize_cols - random_jitter.size.cols); (_a < _b) ? _a : _b; })) _a = (({ typeof (0) _a = (0); typeof (resize_cols - random_jitter.size.cols) _b = (resize_cols - random_jitter .size.cols); (_a < _b) ? _a : _b; })); typeof (({ typeof ( 0) _a = (0); typeof (resize_cols - random_jitter.size.cols) _b = (resize_cols - random_jitter.size.cols); (_a > _b) ? _a : _b; })) _b = (({ typeof (0) _a = (0); typeof (resize_cols - random_jitter.size.cols) _b = (resize_cols - random_jitter.size .cols); (_a > _b) ? _a : _b; })); typeof (crop_x) _x = (crop_x ); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }) | |||
307 | ccv_min(0, resize_cols - random_jitter.size.cols),({ typeof (({ typeof (0) _a = (0); typeof (resize_cols - random_jitter .size.cols) _b = (resize_cols - random_jitter.size.cols); (_a < _b) ? _a : _b; })) _a = (({ typeof (0) _a = (0); typeof (resize_cols - random_jitter.size.cols) _b = (resize_cols - random_jitter .size.cols); (_a < _b) ? _a : _b; })); typeof (({ typeof ( 0) _a = (0); typeof (resize_cols - random_jitter.size.cols) _b = (resize_cols - random_jitter.size.cols); (_a > _b) ? _a : _b; })) _b = (({ typeof (0) _a = (0); typeof (resize_cols - random_jitter.size.cols) _b = (resize_cols - random_jitter.size .cols); (_a > _b) ? _a : _b; })); typeof (crop_x) _x = (crop_x ); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }) | |||
308 | ccv_max(0, resize_cols - random_jitter.size.cols))({ typeof (({ typeof (0) _a = (0); typeof (resize_cols - random_jitter .size.cols) _b = (resize_cols - random_jitter.size.cols); (_a < _b) ? _a : _b; })) _a = (({ typeof (0) _a = (0); typeof (resize_cols - random_jitter.size.cols) _b = (resize_cols - random_jitter .size.cols); (_a < _b) ? _a : _b; })); typeof (({ typeof ( 0) _a = (0); typeof (resize_cols - random_jitter.size.cols) _b = (resize_cols - random_jitter.size.cols); (_a > _b) ? _a : _b; })) _b = (({ typeof (0) _a = (0); typeof (resize_cols - random_jitter.size.cols) _b = (resize_cols - random_jitter.size .cols); (_a > _b) ? _a : _b; })); typeof (crop_x) _x = (crop_x ); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | |||
309 | crop_y = random_jitter.center_crop ? | |||
310 | (resize_rows - random_jitter.size.rows + 1) / 2 : // Otherwise, random select y. | |||
311 | (int)(sfmt_genrand_real1(&sfmt[i]) * (resize_rows - random_jitter.size.rows + 1)); | |||
312 | crop_y = ccv_clamp(crop_y,({ typeof (({ typeof (0) _a = (0); typeof (resize_rows - random_jitter .size.rows) _b = (resize_rows - random_jitter.size.rows); (_a < _b) ? _a : _b; })) _a = (({ typeof (0) _a = (0); typeof (resize_rows - random_jitter.size.rows) _b = (resize_rows - random_jitter .size.rows); (_a < _b) ? _a : _b; })); typeof (({ typeof ( 0) _a = (0); typeof (resize_rows - random_jitter.size.rows) _b = (resize_rows - random_jitter.size.rows); (_a > _b) ? _a : _b; })) _b = (({ typeof (0) _a = (0); typeof (resize_rows - random_jitter.size.rows) _b = (resize_rows - random_jitter.size .rows); (_a > _b) ? _a : _b; })); typeof (crop_y) _x = (crop_y ); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }) | |||
313 | ccv_min(0, resize_rows - random_jitter.size.rows),({ typeof (({ typeof (0) _a = (0); typeof (resize_rows - random_jitter .size.rows) _b = (resize_rows - random_jitter.size.rows); (_a < _b) ? _a : _b; })) _a = (({ typeof (0) _a = (0); typeof (resize_rows - random_jitter.size.rows) _b = (resize_rows - random_jitter .size.rows); (_a < _b) ? _a : _b; })); typeof (({ typeof ( 0) _a = (0); typeof (resize_rows - random_jitter.size.rows) _b = (resize_rows - random_jitter.size.rows); (_a > _b) ? _a : _b; })) _b = (({ typeof (0) _a = (0); typeof (resize_rows - random_jitter.size.rows) _b = (resize_rows - random_jitter.size .rows); (_a > _b) ? _a : _b; })); typeof (crop_y) _x = (crop_y ); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }) | |||
314 | ccv_max(0, resize_rows - random_jitter.size.rows))({ typeof (({ typeof (0) _a = (0); typeof (resize_rows - random_jitter .size.rows) _b = (resize_rows - random_jitter.size.rows); (_a < _b) ? _a : _b; })) _a = (({ typeof (0) _a = (0); typeof (resize_rows - random_jitter.size.rows) _b = (resize_rows - random_jitter .size.rows); (_a < _b) ? _a : _b; })); typeof (({ typeof ( 0) _a = (0); typeof (resize_rows - random_jitter.size.rows) _b = (resize_rows - random_jitter.size.rows); (_a > _b) ? _a : _b; })) _b = (({ typeof (0) _a = (0); typeof (resize_rows - random_jitter.size.rows) _b = (resize_rows - random_jitter.size .rows); (_a > _b) ? _a : _b; })); typeof (crop_y) _x = (crop_y ); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | |||
315 | if (random_jitter.offset.x != 0) | |||
316 | crop_x += sfmt_genrand_real1(&sfmt[i]) * random_jitter.offset.x * 2 - random_jitter.offset.x; | |||
317 | if (random_jitter.offset.y != 0) | |||
318 | crop_y += sfmt_genrand_real1(&sfmt[i]) * random_jitter.offset.y * 2 - random_jitter.offset.y; | |||
319 | // If we can fill in the whole view (not introducing any 0 padding), we can first crop and then scale down / up. | |||
320 | if (resize_cols >= random_jitter.size.cols && resize_rows >= random_jitter.size.rows) | |||
321 | { | |||
322 | const float scale_x = (float)input->cols / resize_cols; | |||
323 | const float scale_y = (float)input->rows / resize_rows; | |||
324 | const int slice_cols = (int)(random_jitter.size.cols * scale_x + 0.5); | |||
325 | const int slice_rows = (int)(random_jitter.size.rows * scale_y + 0.5); | |||
326 | assert(slice_cols <= input->cols)((void) sizeof ((slice_cols <= input->cols) ? 1 : 0), __extension__ ({ if (slice_cols <= input->cols) ; else __assert_fail ("slice_cols <= input->cols", "ccv_cnnp_dataframe_addons.c" , 326, __extension__ __PRETTY_FUNCTION__); })); | |||
327 | assert(slice_rows <= input->rows)((void) sizeof ((slice_rows <= input->rows) ? 1 : 0), __extension__ ({ if (slice_rows <= input->rows) ; else __assert_fail ("slice_rows <= input->rows", "ccv_cnnp_dataframe_addons.c" , 327, __extension__ __PRETTY_FUNCTION__); })); | |||
328 | const int x = ccv_clamp((int)(crop_x * scale_x + 0.5), 0, input->cols - slice_cols)({ typeof (0) _a = (0); typeof (input->cols - slice_cols) _b = (input->cols - slice_cols); typeof ((int)(crop_x * scale_x + 0.5)) _x = ((int)(crop_x * scale_x + 0.5)); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | |||
329 | const int y = ccv_clamp((int)(crop_y * scale_y + 0.5), 0, input->rows - slice_rows)({ typeof (0) _a = (0); typeof (input->rows - slice_rows) _b = (input->rows - slice_rows); typeof ((int)(crop_y * scale_y + 0.5)) _x = ((int)(crop_y * scale_y + 0.5)); (_x < _a) ? _a : ((_x > _b) ? _b : _x); }); | |||
330 | ccv_slice(input, (ccv_matrix_t**)&sliced, 0, y, x, slice_rows, slice_cols); | |||
331 | resize_cols = random_jitter.size.cols; | |||
332 | resize_rows = random_jitter.size.rows; | |||
333 | cropped = 1; | |||
334 | } else | |||
335 | sliced = input; | |||
336 | } else | |||
337 | sliced = input; | |||
338 | ccv_dense_matrix_t* resized = 0; | |||
339 | // Resize. | |||
340 | if (sliced->rows >= resize_rows && sliced->cols >= resize_cols) | |||
341 | { | |||
342 | // If we can fill in the whole view, we can first crop and then scale down / up. | |||
343 | ccv_resample(sliced, &resized, CCV_32F, (double)resize_rows / (double)sliced->rows, (double)resize_cols / (double)sliced->cols, CCV_INTER_AREA); | |||
344 | } else if (sliced->rows != resize_rows || sliced->cols != resize_cols) { | |||
345 | ccv_resample(sliced, &resized, CCV_32F, (double)resize_rows / (double)sliced->rows, (double)resize_cols / (double)sliced->cols, CCV_INTER_CUBIC); | |||
346 | } else { | |||
347 | ccv_shift(sliced, (ccv_matrix_t**)&resized, CCV_32F, 0, 0); // converting to 32f | |||
348 | } | |||
349 | if (sliced != input) | |||
350 | ccv_matrix_free(sliced); | |||
351 | if (random_jitter.symmetric && (sfmt_genrand_uint32(&sfmt[i]) & 1) == 0) | |||
352 | ccv_flip(resized, &resized, 0, CCV_FLIP_X); | |||
353 | _ccv_cnnp_image_manip(resized, random_jitter, &sfmt[i]); | |||
354 | // Apply normalization. Slice will introduce 0 padding, which won't be correct before normalization. | |||
355 | if (random_jitter.normalize.mean[0] != 0 || random_jitter.normalize.std[0] != 1 || | |||
356 | random_jitter.normalize.mean[1] != 0 || random_jitter.normalize.std[1] != 1 || | |||
357 | random_jitter.normalize.mean[2] != 0 || random_jitter.normalize.std[2] != 1) | |||
358 | _ccv_cnnp_normalize(resized, random_jitter.normalize.mean, random_jitter.normalize.std); | |||
359 | // If we haven't cropped in previous step (likely because we have some fill-ins due to the resize down too much). | |||
360 | // Do the crop now. | |||
361 | ccv_dense_matrix_t* patch = 0; | |||
362 | if (!cropped && need_crop) | |||
363 | { | |||
364 | ccv_slice(resized, (ccv_matrix_t**)&patch, CCV_32F, crop_y, crop_x, random_jitter.size.rows, random_jitter.size.cols); | |||
365 | ccv_matrix_free(resized); | |||
366 | } else | |||
367 | patch = resized; | |||
368 | assert(!ccv_any_nan(patch))((void) sizeof ((!ccv_any_nan(patch)) ? 1 : 0), __extension__ ({ if (!ccv_any_nan(patch)) ; else __assert_fail ("!ccv_any_nan(patch)" , "ccv_cnnp_dataframe_addons.c", 368, __extension__ __PRETTY_FUNCTION__ ); })); | |||
369 | data[i] = patch; | |||
370 | } parallel_endfor} } | |||
371 | ccfreefree(sfmt); | |||
372 | } | |||
373 | ||||
374 | int ccv_cnnp_dataframe_image_random_jitter(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const int datatype, const ccv_cnnp_random_jitter_t random_jitter, const char* name) | |||
375 | { | |||
376 | assert(datatype == CCV_32F)((void) sizeof ((datatype == CCV_32F) ? 1 : 0), __extension__ ({ if (datatype == CCV_32F) ; else __assert_fail ("datatype == CCV_32F" , "ccv_cnnp_dataframe_addons.c", 376, __extension__ __PRETTY_FUNCTION__ ); })); | |||
377 | ccv_cnnp_random_jitter_context_t* const random_jitter_context = (ccv_cnnp_random_jitter_context_t*)ccmallocmalloc(sizeof(ccv_cnnp_random_jitter_context_t)); | |||
378 | if (random_jitter.seed) | |||
379 | sfmt_init_gen_rand(&random_jitter_context->sfmt, (uint32_t)random_jitter.seed); | |||
380 | else | |||
381 | sfmt_init_gen_rand(&random_jitter_context->sfmt, ccv_nnc_stream_context_genrand_uint32(0)); | |||
382 | random_jitter_context->datatype = datatype; | |||
383 | random_jitter_context->random_jitter = random_jitter; | |||
384 | int i; | |||
385 | // The std in the random jitter should be inv_std. | |||
386 | for (i = 0; i < 3; i++) | |||
387 | random_jitter_context->random_jitter.normalize.std[i] = random_jitter_context->random_jitter.normalize.std[i] ? 1. / random_jitter_context->random_jitter.normalize.std[i] : 1; | |||
388 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_random_jitter, 0, _ccv_cnnp_image_deinit, COLUMN_ID_LIST(column_idx)(const int []){column_idx}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), random_jitter_context, (ccv_cnnp_column_data_context_deinit_f)ccfreefree, name); | |||
389 | } | |||
390 | ||||
391 | typedef struct { | |||
392 | int range; | |||
393 | int datatype; | |||
394 | int format; | |||
395 | float onval; | |||
396 | float offval; | |||
397 | off_t structof; | |||
398 | } ccv_cnnp_one_hot_context_t; | |||
399 | ||||
400 | static void _ccv_cnnp_one_hot(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) | |||
401 | { | |||
402 | ccv_cnnp_one_hot_context_t* const one_hot = (ccv_cnnp_one_hot_context_t*)context; | |||
403 | ccv_nnc_tensor_param_t params = { | |||
404 | .datatype = one_hot->datatype, | |||
405 | .type = CCV_TENSOR_CPU_MEMORY, | |||
406 | .format = one_hot->format, | |||
407 | .dim = { | |||
408 | one_hot->range, | |||
409 | }, | |||
410 | }; | |||
411 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
412 | int j; | |||
413 | const int label = *(const int*)((const char*)column_data[0][i] + one_hot->structof); | |||
414 | if (!data[i]) | |||
415 | data[i] = ccv_nnc_tensor_new(0, params, 0); | |||
416 | ccv_nnc_tensor_t* const tensor = (ccv_nnc_tensor_t*)data[i]; | |||
417 | assert(label >= 0 && label < one_hot->range)((void) sizeof ((label >= 0 && label < one_hot-> range) ? 1 : 0), __extension__ ({ if (label >= 0 && label < one_hot->range) ; else __assert_fail ("label >= 0 && label < one_hot->range" , "ccv_cnnp_dataframe_addons.c", 417, __extension__ __PRETTY_FUNCTION__ ); })); | |||
418 | if (tensor->info.datatype == CCV_32F) | |||
419 | for (j = 0; j < one_hot->range; j++) | |||
420 | tensor->data.f32[j] = (j == label) ? one_hot->onval : one_hot->offval; | |||
421 | else if (tensor->info.datatype == CCV_16F) | |||
422 | for (j = 0; j < one_hot->range; j++) | |||
423 | ccv_float_to_half_precision((j == label) ? &one_hot->onval : &one_hot->offval, (uint16_t*)(tensor->data.f16 + j), 1); | |||
424 | else | |||
425 | { assert(0)((void) sizeof ((0) ? 1 : 0), __extension__ ({ if (0) ; else __assert_fail ("0", "ccv_cnnp_dataframe_addons.c", 425, __extension__ __PRETTY_FUNCTION__ ); })); } | |||
426 | } parallel_endfor} } | |||
427 | } | |||
428 | ||||
429 | int ccv_cnnp_dataframe_one_hot(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const off_t structof, const int range, const float onval, const float offval, const int datatype, const int format, const char* name) | |||
430 | { | |||
431 | assert(datatype == CCV_32F || datatype == CCV_16F)((void) sizeof ((datatype == CCV_32F || datatype == CCV_16F) ? 1 : 0), __extension__ ({ if (datatype == CCV_32F || datatype == CCV_16F) ; else __assert_fail ("datatype == CCV_32F || datatype == CCV_16F" , "ccv_cnnp_dataframe_addons.c", 431, __extension__ __PRETTY_FUNCTION__ ); })); | |||
432 | ccv_cnnp_one_hot_context_t* const one_hot = (ccv_cnnp_one_hot_context_t*)ccmallocmalloc(sizeof(ccv_cnnp_one_hot_context_t)); | |||
433 | one_hot->range = range; | |||
434 | one_hot->datatype = datatype; | |||
435 | one_hot->format = format; | |||
436 | one_hot->onval = onval; | |||
437 | one_hot->offval = offval; | |||
438 | one_hot->structof = structof; | |||
439 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_one_hot, 0, _ccv_cnnp_tensor_deinit, COLUMN_ID_LIST(column_idx)(const int []){column_idx}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), one_hot, (ccv_cnnp_column_data_context_deinit_f)ccfreefree, name); | |||
440 | } | |||
441 | ||||
442 | typedef struct { | |||
443 | int from_dt; | |||
444 | int to_dt; | |||
445 | int format; | |||
446 | off_t structof; | |||
447 | } ccv_cnnp_copy_scalar_context_t; | |||
448 | ||||
449 | static void _ccv_cnnp_copy_scalar(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) | |||
450 | { | |||
451 | ccv_cnnp_copy_scalar_context_t* const copy_scalar = (ccv_cnnp_copy_scalar_context_t*)context; | |||
452 | ccv_nnc_tensor_param_t params = { | |||
453 | .datatype = copy_scalar->to_dt, | |||
454 | .type = CCV_TENSOR_CPU_MEMORY, | |||
455 | .format = copy_scalar->format, | |||
456 | .dim = {1}, | |||
457 | }; | |||
458 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
| ||||
459 | const ccv_numeric_data_t value = { | |||
460 | .u8 = (unsigned char *)((const char*)column_data[0][i] + copy_scalar->structof), | |||
461 | }; | |||
462 | if (!data[i]) | |||
463 | data[i] = ccv_nnc_tensor_new(0, params, 0); | |||
464 | ccv_nnc_tensor_t* const tensor = (ccv_nnc_tensor_t*)data[i]; | |||
465 | if (copy_scalar->from_dt == CCV_32S) | |||
466 | { | |||
467 | if (tensor->info.datatype == CCV_32F) | |||
468 | tensor->data.f32[0] = value.i32[0]; | |||
469 | else if (tensor->info.datatype == CCV_16F) { | |||
470 | float fval = value.i32[0]; | |||
471 | ccv_float_to_half_precision(&fval, (uint16_t*)tensor->data.f16, 1); | |||
472 | } | |||
473 | } else if (copy_scalar->from_dt == CCV_32F) { | |||
474 | if (tensor->info.datatype == CCV_32F) | |||
475 | tensor->data.f32[0] = value.f32[0]; | |||
| ||||
476 | else if (tensor->info.datatype == CCV_16F) | |||
477 | ccv_float_to_half_precision(value.f32, (uint16_t*)tensor->data.f16, 1); | |||
478 | } else if (copy_scalar->from_dt == CCV_16F) { | |||
479 | if (tensor->info.datatype == CCV_32F) | |||
480 | ccv_half_precision_to_float((uint16_t*)value.f16, tensor->data.f32, 1); | |||
481 | else if (tensor->info.datatype == CCV_16F) | |||
482 | tensor->data.f16[0] = value.f16[0]; | |||
483 | } | |||
484 | } parallel_endfor} } | |||
485 | } | |||
486 | ||||
487 | CCV_WARN_UNUSED(int)int __attribute__((warn_unused_result)) ccv_cnnp_dataframe_copy_scalar(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const off_t structof, const int from_dt, const int to_dt, const int format, const char* name) | |||
488 | { | |||
489 | assert(from_dt == CCV_32S || from_dt == CCV_32F || from_dt == CCV_16F)((void) sizeof ((from_dt == CCV_32S || from_dt == CCV_32F || from_dt == CCV_16F) ? 1 : 0), __extension__ ({ if (from_dt == CCV_32S || from_dt == CCV_32F || from_dt == CCV_16F) ; else __assert_fail ("from_dt == CCV_32S || from_dt == CCV_32F || from_dt == CCV_16F" , "ccv_cnnp_dataframe_addons.c", 489, __extension__ __PRETTY_FUNCTION__ ); })); | |||
490 | assert(to_dt == CCV_32F || to_dt == CCV_16F)((void) sizeof ((to_dt == CCV_32F || to_dt == CCV_16F) ? 1 : 0 ), __extension__ ({ if (to_dt == CCV_32F || to_dt == CCV_16F) ; else __assert_fail ("to_dt == CCV_32F || to_dt == CCV_16F" , "ccv_cnnp_dataframe_addons.c", 490, __extension__ __PRETTY_FUNCTION__ ); })); | |||
491 | ccv_cnnp_copy_scalar_context_t* const copy_scalar = (ccv_cnnp_copy_scalar_context_t*)ccmallocmalloc(sizeof(ccv_cnnp_copy_scalar_context_t)); | |||
492 | copy_scalar->from_dt = from_dt; | |||
493 | copy_scalar->to_dt = to_dt; | |||
494 | copy_scalar->format = format; | |||
495 | copy_scalar->structof = structof; | |||
496 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_copy_scalar, 0, _ccv_cnnp_tensor_deinit, COLUMN_ID_LIST(column_idx)(const int []){column_idx}, (1 +1 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 + 0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 +0 -1), copy_scalar, (ccv_cnnp_column_data_context_deinit_f)ccfreefree, name); | |||
497 | } | |||
498 | ||||
499 | // MARK - Matrix of Ones | |||
500 | ||||
501 | typedef struct { | |||
502 | ccv_cnnp_dataframe_tuple_t tuple; | |||
503 | int variable_size; | |||
504 | int max_length; | |||
505 | } ccv_cnnp_one_squared_context_t; | |||
506 | ||||
507 | static void _ccv_cnnp_one_squared(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) | |||
508 | { | |||
509 | ccv_cnnp_one_squared_context_t* const ones = (ccv_cnnp_one_squared_context_t*)context; | |||
510 | assert(ones->tuple.size == column_size)((void) sizeof ((ones->tuple.size == column_size) ? 1 : 0) , __extension__ ({ if (ones->tuple.size == column_size) ; else __assert_fail ("ones->tuple.size == column_size", "ccv_cnnp_dataframe_addons.c" , 510, __extension__ __PRETTY_FUNCTION__); })); | |||
511 | const int max_length = ones->max_length; | |||
512 | if (ones->variable_size) | |||
513 | { | |||
514 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
515 | ccv_nnc_tensor_t* const first_seq = (ccv_nnc_tensor_t*)column_data[0][i]; | |||
516 | assert(first_seq->info.datatype == CCV_32S)((void) sizeof ((first_seq->info.datatype == CCV_32S) ? 1 : 0), __extension__ ({ if (first_seq->info.datatype == CCV_32S ) ; else __assert_fail ("first_seq->info.datatype == CCV_32S" , "ccv_cnnp_dataframe_addons.c", 516, __extension__ __PRETTY_FUNCTION__ ); })); | |||
517 | const int first_len = ccv_nnc_tensor_count(first_seq->info); | |||
518 | ccv_nnc_tensor_t** outputs = data[i]; | |||
519 | if (!outputs) | |||
520 | outputs = (ccv_nnc_tensor_t**)(data[i] = cccalloccalloc(column_size, sizeof(ccv_nnc_tensor_t*))); | |||
521 | int k; | |||
522 | for (k = 0; k < column_size; k++) | |||
523 | if (!outputs[k]) | |||
524 | outputs[k] = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, first_len, max_length, max_length)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_32S,.dim={first_len , max_length, max_length}}), 0); | |||
525 | int max_len = 0; | |||
526 | for (k = 0; k < column_size; k++) | |||
527 | { | |||
528 | ccv_nnc_tensor_t* const seq = (ccv_nnc_tensor_t*)column_data[k][i]; | |||
529 | assert(seq->info.datatype == CCV_32S)((void) sizeof ((seq->info.datatype == CCV_32S) ? 1 : 0), __extension__ ({ if (seq->info.datatype == CCV_32S) ; else __assert_fail ("seq->info.datatype == CCV_32S", "ccv_cnnp_dataframe_addons.c" , 529, __extension__ __PRETTY_FUNCTION__); })); | |||
530 | const int len = ccv_nnc_tensor_count(seq->info); | |||
531 | assert(len == first_len)((void) sizeof ((len == first_len) ? 1 : 0), __extension__ ({ if (len == first_len) ; else __assert_fail ("len == first_len" , "ccv_cnnp_dataframe_addons.c", 531, __extension__ __PRETTY_FUNCTION__ ); })); | |||
532 | const int* const ia = seq->data.i32; | |||
533 | int l; | |||
534 | for (l = 0; l < len; l++) | |||
535 | max_len = ccv_max(max_len, ia[l])({ typeof (max_len) _a = (max_len); typeof (ia[l]) _b = (ia[l ]); (_a > _b) ? _a : _b; }); | |||
536 | } | |||
537 | assert(max_len <= max_length)((void) sizeof ((max_len <= max_length) ? 1 : 0), __extension__ ({ if (max_len <= max_length) ; else __assert_fail ("max_len <= max_length" , "ccv_cnnp_dataframe_addons.c", 537, __extension__ __PRETTY_FUNCTION__ ); })); | |||
538 | parallel_for(c, column_size){ int c; for ((c) = 0; (c) < (column_size); (c)++) { { | |||
539 | ccv_nnc_tensor_t* const seq = (ccv_nnc_tensor_t*)column_data[c][i]; | |||
540 | assert(seq->info.datatype == CCV_32S)((void) sizeof ((seq->info.datatype == CCV_32S) ? 1 : 0), __extension__ ({ if (seq->info.datatype == CCV_32S) ; else __assert_fail ("seq->info.datatype == CCV_32S", "ccv_cnnp_dataframe_addons.c" , 540, __extension__ __PRETTY_FUNCTION__); })); | |||
541 | const int len = ccv_nnc_tensor_count(seq->info); | |||
542 | assert(len == first_len)((void) sizeof ((len == first_len) ? 1 : 0), __extension__ ({ if (len == first_len) ; else __assert_fail ("len == first_len" , "ccv_cnnp_dataframe_addons.c", 542, __extension__ __PRETTY_FUNCTION__ ); })); | |||
543 | ccv_nnc_tensor_t* tensor = outputs[c]; | |||
544 | tensor = ccv_nnc_tensor_resize(tensor, CPU_TENSOR_NHWC(32S, len, max_len, max_len)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_32S,.dim={len, max_len , max_len}})); | |||
545 | assert(outputs[c] == tensor)((void) sizeof ((outputs[c] == tensor) ? 1 : 0), __extension__ ({ if (outputs[c] == tensor) ; else __assert_fail ("outputs[c] == tensor" , "ccv_cnnp_dataframe_addons.c", 545, __extension__ __PRETTY_FUNCTION__ ); })); // Since we allocated with max_length, this cannot be reallocated. | |||
546 | const int* const ia = seq->data.i32; | |||
547 | parallel_for(j, len){ int j; for ((j) = 0; (j) < (len); (j)++) { { | |||
548 | int x, y; | |||
549 | int seq_len = ia[j]; | |||
550 | int* ib = tensor->data.i32 + j * max_len * max_len; | |||
551 | for (y = 0; y < seq_len; y++) | |||
552 | { | |||
553 | for (x = 0; x < seq_len; x++) | |||
554 | ib[x] = 1; | |||
555 | for (x = seq_len; x < max_len; x++) | |||
556 | ib[x] = 0; | |||
557 | ib += max_len; | |||
558 | } | |||
559 | if (seq_len < max_len) | |||
560 | memset(ib, 0, sizeof(int) * max_len * (max_len - seq_len)); | |||
561 | } parallel_endfor} } | |||
562 | } parallel_endfor} } | |||
563 | } parallel_endfor} } | |||
564 | } else { | |||
565 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
566 | ccv_nnc_tensor_t** outputs = data[i]; | |||
567 | ccv_nnc_tensor_t* const first_seq = (ccv_nnc_tensor_t*)column_data[0][i]; | |||
568 | assert(first_seq->info.datatype == CCV_32S)((void) sizeof ((first_seq->info.datatype == CCV_32S) ? 1 : 0), __extension__ ({ if (first_seq->info.datatype == CCV_32S ) ; else __assert_fail ("first_seq->info.datatype == CCV_32S" , "ccv_cnnp_dataframe_addons.c", 568, __extension__ __PRETTY_FUNCTION__ ); })); | |||
569 | const int first_len = ccv_nnc_tensor_count(first_seq->info); | |||
570 | if (!outputs) | |||
571 | outputs = (ccv_nnc_tensor_t**)(data[i] = cccalloccalloc(column_size, sizeof(ccv_nnc_tensor_t*))); | |||
572 | int k; | |||
573 | for (k = 0; k < column_size; k++) | |||
574 | if (!outputs[k]) | |||
575 | outputs[k] = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, first_len, max_length, max_length)((ccv_nnc_tensor_param_t){.type=(CCV_COMPUTE_DEVICE_ANY) | CCV_TENSOR_CPU_MEMORY ,.format=CCV_TENSOR_FORMAT_NHWC,.datatype=CCV_32S,.dim={first_len , max_length, max_length}}), 0); | |||
576 | parallel_for(c, column_size){ int c; for ((c) = 0; (c) < (column_size); (c)++) { { | |||
577 | ccv_nnc_tensor_t* const tensor = outputs[c]; | |||
578 | ccv_nnc_tensor_t* const seq = (ccv_nnc_tensor_t*)column_data[c][i]; | |||
579 | assert(seq->info.datatype == CCV_32S)((void) sizeof ((seq->info.datatype == CCV_32S) ? 1 : 0), __extension__ ({ if (seq->info.datatype == CCV_32S) ; else __assert_fail ("seq->info.datatype == CCV_32S", "ccv_cnnp_dataframe_addons.c" , 579, __extension__ __PRETTY_FUNCTION__); })); | |||
580 | const int len = ccv_nnc_tensor_count(seq->info); | |||
581 | assert(len == first_len)((void) sizeof ((len == first_len) ? 1 : 0), __extension__ ({ if (len == first_len) ; else __assert_fail ("len == first_len" , "ccv_cnnp_dataframe_addons.c", 581, __extension__ __PRETTY_FUNCTION__ ); })); | |||
582 | const int* const ia = seq->data.i32; | |||
583 | parallel_for(j, len){ int j; for ((j) = 0; (j) < (len); (j)++) { { | |||
584 | int x, y; | |||
585 | int seq_len = ia[j]; | |||
586 | int* ib = tensor->data.i32 + j * max_length * max_length; | |||
587 | for (y = 0; y < seq_len; y++) | |||
588 | { | |||
589 | for (x = 0; x < seq_len; x++) | |||
590 | ib[x] = 1; | |||
591 | for (x = seq_len; x < max_length; x++) | |||
592 | ib[x] = 0; | |||
593 | ib += max_length; | |||
594 | } | |||
595 | if (seq_len < max_length) | |||
596 | memset(ib, 0, sizeof(int) * max_length * (max_length - seq_len)); | |||
597 | } parallel_endfor} } | |||
598 | } parallel_endfor} } | |||
599 | } parallel_endfor} } | |||
600 | } | |||
601 | } | |||
602 | ||||
603 | CCV_WARN_UNUSED(int)int __attribute__((warn_unused_result)) ccv_cnnp_dataframe_one_squared(ccv_cnnp_dataframe_t* const dataframe, const int* const column_idxs, const int column_idx_size, const int variable_size, const int max_length, const char* name) | |||
604 | { | |||
605 | assert(max_length > 0)((void) sizeof ((max_length > 0) ? 1 : 0), __extension__ ( { if (max_length > 0) ; else __assert_fail ("max_length > 0" , "ccv_cnnp_dataframe_addons.c", 605, __extension__ __PRETTY_FUNCTION__ ); })); | |||
606 | assert(variable_size == 0 || variable_size == 1)((void) sizeof ((variable_size == 0 || variable_size == 1) ? 1 : 0), __extension__ ({ if (variable_size == 0 || variable_size == 1) ; else __assert_fail ("variable_size == 0 || variable_size == 1" , "ccv_cnnp_dataframe_addons.c", 606, __extension__ __PRETTY_FUNCTION__ ); })); | |||
607 | ccv_cnnp_one_squared_context_t* const ones = (ccv_cnnp_one_squared_context_t*)ccmallocmalloc(sizeof(ccv_cnnp_one_squared_context_t)); | |||
608 | ones->tuple.size = column_idx_size; | |||
609 | ones->variable_size = variable_size; | |||
610 | ones->max_length = max_length; | |||
611 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_one_squared, 0, _ccv_cnnp_tensor_list_deinit, column_idxs, column_idx_size, ones, (ccv_cnnp_column_data_context_deinit_f)ccfreefree, name); | |||
612 | } | |||
613 | ||||
614 | // MARK - Truncate Matrix | |||
615 | ||||
616 | static void _ccv_cnnp_truncate(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) | |||
617 | { | |||
618 | assert(column_size >= 2)((void) sizeof ((column_size >= 2) ? 1 : 0), __extension__ ({ if (column_size >= 2) ; else __assert_fail ("column_size >= 2" , "ccv_cnnp_dataframe_addons.c", 618, __extension__ __PRETTY_FUNCTION__ ); })); | |||
619 | assert(column_size % 2 == 0)((void) sizeof ((column_size % 2 == 0) ? 1 : 0), __extension__ ({ if (column_size % 2 == 0) ; else __assert_fail ("column_size % 2 == 0" , "ccv_cnnp_dataframe_addons.c", 619, __extension__ __PRETTY_FUNCTION__ ); })); | |||
620 | const int tuple_size = column_size / 2; | |||
621 | ccv_cnnp_dataframe_tuple_t* const tuple = (ccv_cnnp_dataframe_tuple_t*)context; | |||
622 | assert(tuple->size == tuple_size)((void) sizeof ((tuple->size == tuple_size) ? 1 : 0), __extension__ ({ if (tuple->size == tuple_size) ; else __assert_fail ("tuple->size == tuple_size" , "ccv_cnnp_dataframe_addons.c", 622, __extension__ __PRETTY_FUNCTION__ ); })); | |||
623 | parallel_for(i, batch_size){ int i; for ((i) = 0; (i) < (batch_size); (i)++) { { | |||
624 | int k; | |||
625 | ccv_nnc_tensor_t* const first_seq = (ccv_nnc_tensor_t*)column_data[tuple_size][i]; | |||
626 | assert(first_seq->info.datatype == CCV_32S)((void) sizeof ((first_seq->info.datatype == CCV_32S) ? 1 : 0), __extension__ ({ if (first_seq->info.datatype == CCV_32S ) ; else __assert_fail ("first_seq->info.datatype == CCV_32S" , "ccv_cnnp_dataframe_addons.c", 626, __extension__ __PRETTY_FUNCTION__ ); })); | |||
627 | const int first_len = ccv_nnc_tensor_count(first_seq->info); | |||
628 | int max_len = 0; | |||
629 | for (k = 0; k < tuple_size; k++) | |||
630 | { | |||
631 | ccv_nnc_tensor_t* const seq = (ccv_nnc_tensor_t*)column_data[tuple_size + k][i]; | |||
632 | assert(seq->info.datatype == CCV_32S)((void) sizeof ((seq->info.datatype == CCV_32S) ? 1 : 0), __extension__ ({ if (seq->info.datatype == CCV_32S) ; else __assert_fail ("seq->info.datatype == CCV_32S", "ccv_cnnp_dataframe_addons.c" , 632, __extension__ __PRETTY_FUNCTION__); })); | |||
633 | const int len = ccv_nnc_tensor_count(seq->info); | |||
634 | assert(len == first_len)((void) sizeof ((len == first_len) ? 1 : 0), __extension__ ({ if (len == first_len) ; else __assert_fail ("len == first_len" , "ccv_cnnp_dataframe_addons.c", 634, __extension__ __PRETTY_FUNCTION__ ); })); | |||
635 | const int* const ia = seq->data.i32; | |||
636 | int l; | |||
637 | for (l = 0; l < len; l++) | |||
638 | max_len = ccv_max(max_len, ia[l])({ typeof (max_len) _a = (max_len); typeof (ia[l]) _b = (ia[l ]); (_a > _b) ? _a : _b; }); | |||
639 | } | |||
640 | ccv_nnc_tensor_t* const first_inp = (ccv_nnc_tensor_t*)column_data[0][i]; | |||
641 | ccv_nnc_tensor_param_t first_params = first_inp->info; | |||
642 | assert(first_params.dim[0] == first_len)((void) sizeof ((first_params.dim[0] == first_len) ? 1 : 0), __extension__ ({ if (first_params.dim[0] == first_len) ; else __assert_fail ("first_params.dim[0] == first_len", "ccv_cnnp_dataframe_addons.c" , 642, __extension__ __PRETTY_FUNCTION__); })); | |||
643 | assert(max_len <= first_params.dim[1])((void) sizeof ((max_len <= first_params.dim[1]) ? 1 : 0), __extension__ ({ if (max_len <= first_params.dim[1]) ; else __assert_fail ("max_len <= first_params.dim[1]", "ccv_cnnp_dataframe_addons.c" , 643, __extension__ __PRETTY_FUNCTION__); })); | |||
644 | first_params.dim[1] = max_len; | |||
645 | ccv_nnc_tensor_t** outputs = data[i]; | |||
646 | if (!outputs) | |||
647 | outputs = (ccv_nnc_tensor_t**)(data[i] = cccalloccalloc(tuple_size, sizeof(ccv_nnc_tensor_t*))); | |||
648 | for (k = 0; k < tuple_size; k++) | |||
649 | { | |||
650 | if (!outputs[k]) | |||
651 | outputs[k] = ccv_nnc_tensor_new(0, first_params, 0); | |||
652 | else | |||
653 | outputs[k] = ccv_nnc_tensor_resize(outputs[k], first_params); | |||
654 | } | |||
655 | parallel_for(c, tuple_size){ int c; for ((c) = 0; (c) < (tuple_size); (c)++) { { | |||
656 | ccv_nnc_tensor_t* const seq = (ccv_nnc_tensor_t*)column_data[tuple_size + c][i]; | |||
657 | assert(seq->info.datatype == CCV_32S)((void) sizeof ((seq->info.datatype == CCV_32S) ? 1 : 0), __extension__ ({ if (seq->info.datatype == CCV_32S) ; else __assert_fail ("seq->info.datatype == CCV_32S", "ccv_cnnp_dataframe_addons.c" , 657, __extension__ __PRETTY_FUNCTION__); })); | |||
658 | const int len = ccv_nnc_tensor_count(seq->info); | |||
659 | ccv_nnc_tensor_t* const inp = (ccv_nnc_tensor_t*)column_data[c][i]; | |||
660 | ccv_nnc_tensor_param_t params = inp->info; | |||
661 | assert(params.dim[0] == len)((void) sizeof ((params.dim[0] == len) ? 1 : 0), __extension__ ({ if (params.dim[0] == len) ; else __assert_fail ("params.dim[0] == len" , "ccv_cnnp_dataframe_addons.c", 661, __extension__ __PRETTY_FUNCTION__ ); })); | |||
662 | assert(first_len == len)((void) sizeof ((first_len == len) ? 1 : 0), __extension__ ({ if (first_len == len) ; else __assert_fail ("first_len == len" , "ccv_cnnp_dataframe_addons.c", 662, __extension__ __PRETTY_FUNCTION__ ); })); | |||
663 | assert(max_len <= params.dim[1])((void) sizeof ((max_len <= params.dim[1]) ? 1 : 0), __extension__ ({ if (max_len <= params.dim[1]) ; else __assert_fail ("max_len <= params.dim[1]" , "ccv_cnnp_dataframe_addons.c", 663, __extension__ __PRETTY_FUNCTION__ ); })); | |||
664 | assert(params.dim[2] == 0)((void) sizeof ((params.dim[2] == 0) ? 1 : 0), __extension__ ( { if (params.dim[2] == 0) ; else __assert_fail ("params.dim[2] == 0" , "ccv_cnnp_dataframe_addons.c", 664, __extension__ __PRETTY_FUNCTION__ ); })); | |||
665 | const int ori_len = params.dim[1]; | |||
666 | ccv_nnc_tensor_t* const out = outputs[c]; | |||
667 | uint8_t* const ua = inp->data.u8; | |||
668 | uint8_t* const ub = out->data.u8; | |||
669 | size_t la = CCV_GET_DATA_TYPE_SIZE(params.datatype)_ccv_get_data_type_size[((params.datatype) & 0xFF000) >> 12] * ori_len; | |||
670 | size_t lb = CCV_GET_DATA_TYPE_SIZE(params.datatype)_ccv_get_data_type_size[((params.datatype) & 0xFF000) >> 12] * max_len; | |||
671 | parallel_for(j, len){ int j; for ((j) = 0; (j) < (len); (j)++) { { | |||
672 | memcpy(ub + lb * j, ua + la * j, lb); | |||
673 | } parallel_endfor} } | |||
674 | } parallel_endfor} } | |||
675 | } parallel_endfor} } | |||
676 | } | |||
677 | ||||
678 | int ccv_cnnp_dataframe_truncate(ccv_cnnp_dataframe_t* const dataframe, const int* const vec_idxs, const int vec_idx_size, const int* const len_idxs, const int len_idx_size, const char* name) | |||
679 | { | |||
680 | const int total_idx_size = vec_idx_size + len_idx_size; | |||
681 | assert(total_idx_size > 0)((void) sizeof ((total_idx_size > 0) ? 1 : 0), __extension__ ({ if (total_idx_size > 0) ; else __assert_fail ("total_idx_size > 0" , "ccv_cnnp_dataframe_addons.c", 681, __extension__ __PRETTY_FUNCTION__ ); })); | |||
682 | assert(vec_idx_size == len_idx_size)((void) sizeof ((vec_idx_size == len_idx_size) ? 1 : 0), __extension__ ({ if (vec_idx_size == len_idx_size) ; else __assert_fail ("vec_idx_size == len_idx_size" , "ccv_cnnp_dataframe_addons.c", 682, __extension__ __PRETTY_FUNCTION__ ); })); | |||
683 | int total_idxs[total_idx_size]; | |||
684 | memcpy(total_idxs, vec_idxs, sizeof(int) * vec_idx_size); | |||
685 | memcpy(total_idxs + vec_idx_size, len_idxs, sizeof(int) * len_idx_size); | |||
686 | ccv_cnnp_dataframe_tuple_t* const tuple = (ccv_cnnp_dataframe_tuple_t*)ccmallocmalloc(sizeof(ccv_cnnp_dataframe_tuple_t)); | |||
687 | tuple->size = vec_idx_size; | |||
688 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_truncate, 0, _ccv_cnnp_tensor_list_deinit, total_idxs, total_idx_size, tuple, (ccv_cnnp_column_data_context_deinit_f)ccfreefree, name); | |||
689 | } | |||
690 | ||||
691 | // MARK - Batching | |||
692 | ||||
693 | typedef struct { | |||
694 | ccv_cnnp_dataframe_tuple_t tuple; | |||
695 | int format; | |||
696 | int batch_count; | |||
697 | int group_count; | |||
698 | } ccv_cnnp_batch_context_t; | |||
699 | ||||
700 | static void _ccv_cnnp_combine_new(void* const* const input_data, const int input_size, void** const output_data, void* const context, ccv_nnc_stream_context_t* const stream_context) | |||
701 | { | |||
702 | ccv_cnnp_batch_context_t* const batch = (ccv_cnnp_batch_context_t*)context; | |||
703 | const int output_tuple_size = batch->tuple.size; | |||
704 | const int batch_count = batch->batch_count; | |||
705 | const int group_count = batch->group_count; | |||
706 | const int input_tuple_size = output_tuple_size / group_count; | |||
707 | int i, j, k; | |||
708 | assert(input_size > 0)((void) sizeof ((input_size > 0) ? 1 : 0), __extension__ ( { if (input_size > 0) ; else __assert_fail ("input_size > 0" , "ccv_cnnp_dataframe_addons.c", 708, __extension__ __PRETTY_FUNCTION__ ); })); | |||
709 | if (!output_data[0]) | |||
710 | { | |||
711 | ccv_nnc_tensor_t** const inputs = (ccv_nnc_tensor_t**)input_data[0]; | |||
712 | ccv_nnc_tensor_t** const tensors = (ccv_nnc_tensor_t**)(output_data[0] = ccmallocmalloc(sizeof(ccv_nnc_tensor_t*) * output_tuple_size)); | |||
713 | for (i = 0; i < group_count; i++) | |||
714 | for (j = 0; j < input_tuple_size; j++) | |||
715 | { | |||
716 | ccv_nnc_tensor_param_t params = inputs[j]->info; | |||
717 | assert(params.datatype == CCV_32F || params.datatype == CCV_32S || params.datatype == CCV_16F)((void) sizeof ((params.datatype == CCV_32F || params.datatype == CCV_32S || params.datatype == CCV_16F) ? 1 : 0), __extension__ ({ if (params.datatype == CCV_32F || params.datatype == CCV_32S || params.datatype == CCV_16F) ; else __assert_fail ("params.datatype == CCV_32F || params.datatype == CCV_32S || params.datatype == CCV_16F" , "ccv_cnnp_dataframe_addons.c", 717, __extension__ __PRETTY_FUNCTION__ ); })); // Only support 32 bit float yet. | |||
718 | assert(params.format == CCV_TENSOR_FORMAT_NHWC || params.format == CCV_TENSOR_FORMAT_NCHW)((void) sizeof ((params.format == CCV_TENSOR_FORMAT_NHWC || params .format == CCV_TENSOR_FORMAT_NCHW) ? 1 : 0), __extension__ ({ if (params.format == CCV_TENSOR_FORMAT_NHWC || params.format == CCV_TENSOR_FORMAT_NCHW) ; else __assert_fail ("params.format == CCV_TENSOR_FORMAT_NHWC || params.format == CCV_TENSOR_FORMAT_NCHW" , "ccv_cnnp_dataframe_addons.c", 718, __extension__ __PRETTY_FUNCTION__ ); })); | |||
719 | params.format = batch->format; | |||
720 | // Special-case for dim count is 3 and 1, in these two cases, the N is not provided. | |||
721 | if (batch->format == inputs[j]->info.format) | |||
722 | { | |||
723 | const int nd = ccv_nnc_tensor_nd(params.dim); | |||
724 | memset(params.dim, 0, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
725 | memcpy(params.dim + 1, inputs[j]->info.dim, sizeof(int) * nd); | |||
726 | } else { | |||
727 | const int nd = ccv_nnc_tensor_nd(params.dim); | |||
728 | if (nd < 3) | |||
729 | { | |||
730 | memset(params.dim, 0, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
731 | memcpy(params.dim + 1, inputs[j]->info.dim, sizeof(int) * nd); | |||
732 | } else if (nd >= 3) { | |||
733 | memset(params.dim, 0, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC(12)); | |||
734 | const int hw = ccv_nnc_tensor_hw(inputs[j]->info, nd); | |||
735 | if (batch->format == CCV_TENSOR_FORMAT_NCHW) | |||
736 | { | |||
737 | params.dim[1] = ccv_nnc_tensor_get_c(inputs[j]->info); | |||
738 | for (k = 0; k < CCV_NNC_MAX_DIM(2); k++) | |||
739 | params.dim[k + 2] = inputs[j]->info.dim[k + hw]; | |||
740 | } else { | |||
741 | params.dim[CCV_NNC_MAX_DIM(2) + 1] = ccv_nnc_tensor_get_c(inputs[j]->info); | |||
742 | for (k = 0; k < CCV_NNC_MAX_DIM(2); k++) | |||
743 | params.dim[k + 1] = inputs[j]->info.dim[k + hw]; | |||
744 | } | |||
745 | } | |||
746 | } | |||
747 | params.dim[0] = batch_count; // Set the batch count now. | |||
748 | tensors[i * input_tuple_size + j] = ccv_nnc_tensor_new(0, params, 0); | |||
749 | } | |||
750 | } | |||
751 | for (i = 0; i < group_count; i++) | |||
752 | for (j = 0; j < input_tuple_size; j++) | |||
753 | { | |||
754 | ccv_nnc_tensor_t* const output = ((ccv_nnc_tensor_t**)output_data[0])[i * input_tuple_size + j]; | |||
755 | parallel_for(k, batch_count){ int k; for ((k) = 0; (k) < (batch_count); (k)++) { { | |||
756 | ccv_nnc_tensor_t* const input = ((ccv_nnc_tensor_t**)input_data[(k + i * batch_count) % input_size])[j]; | |||
757 | const size_t tensor_count = ccv_nnc_tensor_count(input->info); | |||
758 | if (input->info.datatype == CCV_32F) | |||
759 | { | |||
760 | float* const ap = input->data.f32; | |||
761 | float* const bp = output->data.f32 + k * tensor_count; | |||
762 | if (input->info.format == output->info.format) | |||
763 | memcpy(bp, ap, sizeof(float) * tensor_count); | |||
764 | else { | |||
765 | // Do a simple format conversion. | |||
766 | const int c = ccv_nnc_tensor_get_c(input->info); | |||
767 | assert(c > 0)((void) sizeof ((c > 0) ? 1 : 0), __extension__ ({ if (c > 0) ; else __assert_fail ("c > 0", "ccv_cnnp_dataframe_addons.c" , 767, __extension__ __PRETTY_FUNCTION__); })); | |||
768 | const size_t hw_count = tensor_count / c; | |||
769 | size_t x; | |||
770 | int y; | |||
771 | if (input->info.format == CCV_TENSOR_FORMAT_NHWC && output->info.format == CCV_TENSOR_FORMAT_NCHW) | |||
772 | for (x = 0; x < hw_count; x++) | |||
773 | for (y = 0; y < c; y++) | |||
774 | bp[y * hw_count + x] = ap[x * c + y]; | |||
775 | else if (input->info.format == CCV_TENSOR_FORMAT_NCHW && output->info.format == CCV_TENSOR_FORMAT_NHWC) | |||
776 | for (x = 0; x < hw_count; x++) | |||
777 | for (y = 0; y < c; y++) | |||
778 | bp[x * c + y] = ap[y * hw_count + x]; | |||
779 | } | |||
780 | } else if (input->info.datatype == CCV_32S) { | |||
781 | int* const ap = input->data.i32; | |||
782 | int* const bp = output->data.i32 + k * tensor_count; | |||
783 | if (input->info.format == output->info.format) | |||
784 | memcpy(bp, ap, sizeof(int) * tensor_count); | |||
785 | else { | |||
786 | // Do a simple format conversion. | |||
787 | const int c = ccv_nnc_tensor_get_c(input->info); | |||
788 | assert(c > 0)((void) sizeof ((c > 0) ? 1 : 0), __extension__ ({ if (c > 0) ; else __assert_fail ("c > 0", "ccv_cnnp_dataframe_addons.c" , 788, __extension__ __PRETTY_FUNCTION__); })); | |||
789 | const size_t hw_count = tensor_count / c; | |||
790 | size_t x; | |||
791 | int y; | |||
792 | if (input->info.format == CCV_TENSOR_FORMAT_NHWC && output->info.format == CCV_TENSOR_FORMAT_NCHW) | |||
793 | for (x = 0; x < hw_count; x++) | |||
794 | for (y = 0; y < c; y++) | |||
795 | bp[y * hw_count + x] = ap[x * c + y]; | |||
796 | else if (input->info.format == CCV_TENSOR_FORMAT_NCHW && output->info.format == CCV_TENSOR_FORMAT_NHWC) | |||
797 | for (x = 0; x < hw_count; x++) | |||
798 | for (y = 0; y < c; y++) | |||
799 | bp[x * c + y] = ap[y * hw_count + x]; | |||
800 | } | |||
801 | } else if (input->info.datatype == CCV_16F) { | |||
802 | ccv_float16_t* const ap = input->data.f16; | |||
803 | ccv_float16_t* const bp = output->data.f16 + k * tensor_count; | |||
804 | if (input->info.format == output->info.format) | |||
805 | memcpy(bp, ap, sizeof(ccv_float16_t) * tensor_count); | |||
806 | else { | |||
807 | // Do a simple format conversion. | |||
808 | const int c = ccv_nnc_tensor_get_c(input->info); | |||
809 | assert(c > 0)((void) sizeof ((c > 0) ? 1 : 0), __extension__ ({ if (c > 0) ; else __assert_fail ("c > 0", "ccv_cnnp_dataframe_addons.c" , 809, __extension__ __PRETTY_FUNCTION__); })); | |||
810 | const size_t hw_count = tensor_count / c; | |||
811 | size_t x; | |||
812 | int y; | |||
813 | if (input->info.format == CCV_TENSOR_FORMAT_NHWC && output->info.format == CCV_TENSOR_FORMAT_NCHW) | |||
814 | for (x = 0; x < hw_count; x++) | |||
815 | for (y = 0; y < c; y++) | |||
816 | bp[y * hw_count + x] = ap[x * c + y]; | |||
817 | else if (input->info.format == CCV_TENSOR_FORMAT_NCHW && output->info.format == CCV_TENSOR_FORMAT_NHWC) | |||
818 | for (x = 0; x < hw_count; x++) | |||
819 | for (y = 0; y < c; y++) | |||
820 | bp[x * c + y] = ap[y * hw_count + x]; | |||
821 | } | |||
822 | } else { | |||
823 | assert(0)((void) sizeof ((0) ? 1 : 0), __extension__ ({ if (0) ; else __assert_fail ("0", "ccv_cnnp_dataframe_addons.c", 823, __extension__ __PRETTY_FUNCTION__ ); })); | |||
824 | } | |||
825 | } parallel_endfor} } | |||
826 | } | |||
827 | } | |||
828 | ||||
829 | static void _ccv_cnnp_combine_deinit(void* const self, void* const context) | |||
830 | { | |||
831 | ccv_cnnp_batch_context_t* const batch = (ccv_cnnp_batch_context_t*)context; | |||
832 | ccv_nnc_tensor_t** const tensors = (ccv_nnc_tensor_t**)self; | |||
833 | const int size = batch->tuple.size; | |||
834 | int i; | |||
835 | for (i = 0; i < size; i++) | |||
836 | ccv_nnc_tensor_free(tensors[i]); | |||
837 | ccfreefree(tensors); | |||
838 | } | |||
839 | ||||
840 | ccv_cnnp_dataframe_t* ccv_cnnp_dataframe_combine_new(ccv_cnnp_dataframe_t* const dataframe, const int* const column_idxs, const int column_idx_size, const int batch_count, const int group_count, const int format) | |||
841 | { | |||
842 | assert(format == CCV_TENSOR_FORMAT_NCHW || format == CCV_TENSOR_FORMAT_NHWC)((void) sizeof ((format == CCV_TENSOR_FORMAT_NCHW || format == CCV_TENSOR_FORMAT_NHWC) ? 1 : 0), __extension__ ({ if (format == CCV_TENSOR_FORMAT_NCHW || format == CCV_TENSOR_FORMAT_NHWC ) ; else __assert_fail ("format == CCV_TENSOR_FORMAT_NCHW || format == CCV_TENSOR_FORMAT_NHWC" , "ccv_cnnp_dataframe_addons.c", 842, __extension__ __PRETTY_FUNCTION__ ); })); | |||
843 | assert(column_idx_size >= 1)((void) sizeof ((column_idx_size >= 1) ? 1 : 0), __extension__ ({ if (column_idx_size >= 1) ; else __assert_fail ("column_idx_size >= 1" , "ccv_cnnp_dataframe_addons.c", 843, __extension__ __PRETTY_FUNCTION__ ); })); | |||
844 | assert(batch_count > 0)((void) sizeof ((batch_count > 0) ? 1 : 0), __extension__ ( { if (batch_count > 0) ; else __assert_fail ("batch_count > 0" , "ccv_cnnp_dataframe_addons.c", 844, __extension__ __PRETTY_FUNCTION__ ); })); | |||
845 | assert(group_count > 0)((void) sizeof ((group_count > 0) ? 1 : 0), __extension__ ( { if (group_count > 0) ; else __assert_fail ("group_count > 0" , "ccv_cnnp_dataframe_addons.c", 845, __extension__ __PRETTY_FUNCTION__ ); })); | |||
846 | const int derived = ccv_cnnp_dataframe_make_tuple(dataframe, column_idxs, column_idx_size, 0); | |||
847 | ccv_cnnp_batch_context_t* const batch = (ccv_cnnp_batch_context_t*)ccmallocmalloc(sizeof(ccv_cnnp_batch_context_t)); | |||
848 | batch->tuple.size = column_idx_size * group_count; | |||
849 | batch->format = format; | |||
850 | batch->batch_count = batch_count; | |||
851 | batch->group_count = group_count; | |||
852 | return ccv_cnnp_dataframe_sample_new(dataframe, _ccv_cnnp_combine_new, _ccv_cnnp_combine_deinit, derived, batch_count * group_count, batch, (ccv_cnnp_column_data_context_deinit_f)ccfreefree); | |||
853 | } |