/home/liu/actions-runner/_work/ccv/ccv/lib/nnc/ccv_cnnp_dataframe_addons.c
Line | Count | Source (jump to first uncovered line) |
1 | | #include "ccv_nnc.h" |
2 | | #include "ccv_nnc_easy.h" |
3 | | #include "ccv_nnc_internal.h" |
4 | | #include "ccv_internal.h" |
5 | | #include "_ccv_cnnp_dataframe.h" |
6 | | #include "3rdparty/sfmt/SFMT.h" |
7 | | |
8 | | // MARK - Create Dataframe from Array |
9 | | |
10 | | static void _ccv_cnnp_array_enum(const int column_idx, const int* const row_idxs, const int row_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) |
11 | 384 | { |
12 | 384 | int i; |
13 | 384 | ccv_array_t* const array = (ccv_array_t*)context; |
14 | 181k | for (i = 0; i < row_size; i++180k ) |
15 | 180k | data[i] = ccv_array_get(array, row_idxs[i]); |
16 | 384 | } |
17 | | |
18 | | ccv_cnnp_dataframe_t* ccv_cnnp_dataframe_from_array_new(ccv_array_t* const array) |
19 | 20 | { |
20 | 20 | const ccv_cnnp_column_data_t array_column_data = { |
21 | 20 | .data_enum = _ccv_cnnp_array_enum, |
22 | 20 | .context = array |
23 | 20 | }; |
24 | 20 | return ccv_cnnp_dataframe_new(&array_column_data, 1, array->rnum); |
25 | 20 | } |
26 | | |
27 | | typedef struct { |
28 | | ccv_cnnp_dataframe_tuple_t tuple; |
29 | | int tensor_offset; |
30 | | int device_id; |
31 | | } ccv_cnnp_copy_to_gpu_context_t; |
32 | | |
33 | | // MARK - Copy Tensors from CPU to GPU |
34 | | |
35 | | static void _ccv_cnnp_tensor_list_deinit(void* const data, void* const context) |
36 | 3.93k | { |
37 | 3.93k | ccv_cnnp_dataframe_tuple_t* const tuple = (ccv_cnnp_dataframe_tuple_t*)context; |
38 | 3.93k | ccv_nnc_tensor_t** const tensor_list = (ccv_nnc_tensor_t**)data; |
39 | 3.93k | int i; |
40 | 7.90k | for (i = 0; i < tuple->size; i++3.97k ) |
41 | 3.97k | if (tensor_list[i]) |
42 | 3.97k | ccv_nnc_tensor_free(tensor_list[i]); |
43 | 3.93k | ccfree(tensor_list); |
44 | 3.93k | } |
45 | | |
46 | | static void _ccv_cnnp_copy_to_gpu(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) |
47 | 760 | { |
48 | 760 | const ccv_cnnp_copy_to_gpu_context_t* const copy_to_gpu_context = (ccv_cnnp_copy_to_gpu_context_t*)context; |
49 | 760 | int i, j; |
50 | 1.52k | for (i = 0; i < batch_size; i++760 ) |
51 | 760 | { |
52 | 760 | ccv_nnc_tensor_t* const* const inputs = (ccv_nnc_tensor_t* const*)column_data[0][i] + copy_to_gpu_context->tensor_offset; |
53 | 760 | ccv_nnc_tensor_t** outputs = (ccv_nnc_tensor_t**)data[i]; |
54 | 760 | if (!outputs) |
55 | 73 | outputs = (ccv_nnc_tensor_t**)(data[i] = cccalloc(copy_to_gpu_context->tuple.size, sizeof(ccv_nnc_tensor_t*))); |
56 | 2.11k | for (j = 0; j < copy_to_gpu_context->tuple.size; j++1.35k ) |
57 | 1.35k | { |
58 | 1.35k | ccv_nnc_tensor_param_t params = inputs[j]->info; |
59 | 1.35k | params.type &= ~CCV_TENSOR_CPU_MEMORY; |
60 | 1.35k | params.type |= CCV_TENSOR_GPU_MEMORY; // Change to GPU memory. |
61 | 1.35k | CCV_TENSOR_SET_DEVICE_ID(params.type, copy_to_gpu_context->device_id); |
62 | 1.35k | outputs[j] = outputs[j] ? ccv_nnc_tensor_resize(outputs[j], params)1.25k : ccv_nnc_tensor_new(0, params, 0)99 ; |
63 | 1.35k | ccv_nnc_tensor_pin_memory(inputs[j]); |
64 | 1.35k | } |
65 | 760 | ccv_nnc_cmd_exec(CMD_DATA_TRANSFER_FORWARD(), ccv_nnc_no_hint, 0, inputs, copy_to_gpu_context->tuple.size, outputs, copy_to_gpu_context->tuple.size, stream_context); |
66 | 760 | } |
67 | 760 | } |
68 | | |
69 | | int ccv_cnnp_dataframe_copy_to_gpu(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const int tensor_offset, const int tensor_size, const int device_id, const char* name) |
70 | 64 | { |
71 | 64 | assert(tensor_size > 0); |
72 | 64 | int stream_type = CCV_STREAM_CONTEXT_GPU; |
73 | 64 | CCV_STREAM_SET_DEVICE_ID(stream_type, device_id); |
74 | 64 | ccv_cnnp_copy_to_gpu_context_t* const copy_to_gpu_context = (ccv_cnnp_copy_to_gpu_context_t*)ccmalloc(sizeof(ccv_cnnp_copy_to_gpu_context_t)); |
75 | 64 | copy_to_gpu_context->tuple.size = tensor_size; |
76 | 64 | copy_to_gpu_context->tensor_offset = tensor_offset; |
77 | 64 | copy_to_gpu_context->device_id = device_id; |
78 | 64 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_copy_to_gpu, stream_type, _ccv_cnnp_tensor_list_deinit, COLUMN_ID_LIST(column_idx), copy_to_gpu_context, (ccv_cnnp_column_data_context_deinit_f)ccfree, name); |
79 | 64 | } |
80 | | |
81 | | // MARK - Use Command to Generate Output Tuple |
82 | | |
83 | | typedef struct { |
84 | | ccv_cnnp_dataframe_tuple_t tuple; |
85 | | int input_offset; |
86 | | int input_size; |
87 | | ccv_nnc_cmd_t cmd; |
88 | | ccv_nnc_hint_t hint; |
89 | | int flags; |
90 | | ccv_nnc_tensor_param_t output_params[1]; |
91 | | } ccv_cnnp_cmd_exec_context_t; |
92 | | |
93 | | static void _ccv_cnnp_dataframe_cmd_exec(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) |
94 | 296 | { |
95 | 296 | const ccv_cnnp_cmd_exec_context_t* const cmd_exec_context = (ccv_cnnp_cmd_exec_context_t*)context; |
96 | 296 | int i, j; |
97 | 120k | for (i = 0; i < batch_size; i++120k ) |
98 | 120k | { |
99 | 120k | ccv_nnc_tensor_t* const* const inputs = (ccv_nnc_tensor_t* const*)column_data[0][i] + cmd_exec_context->input_offset; |
100 | 120k | ccv_nnc_tensor_t** outputs = (ccv_nnc_tensor_t**)data[i]; |
101 | 120k | if (!outputs) |
102 | 3.84k | { |
103 | 3.84k | outputs = (ccv_nnc_tensor_t**)(data[i] = ccmalloc(sizeof(ccv_nnc_tensor_t*) * cmd_exec_context->tuple.size)); |
104 | 7.68k | for (j = 0; j < cmd_exec_context->tuple.size; j++3.84k ) |
105 | 3.84k | outputs[j] = ccv_nnc_tensor_new(0, cmd_exec_context->output_params[j], 0); |
106 | 3.84k | } |
107 | 120k | ccv_nnc_cmd_exec(cmd_exec_context->cmd, cmd_exec_context->hint, cmd_exec_context->flags, inputs, cmd_exec_context->input_size, outputs, cmd_exec_context->tuple.size, stream_context); |
108 | 120k | } |
109 | 296 | } |
110 | | |
111 | | int ccv_cnnp_dataframe_cmd_exec(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, const int input_offset, const int input_size, const ccv_nnc_tensor_param_t* const output_params, const int output_size, const int stream_type, const char* name) |
112 | 5 | { |
113 | 5 | assert(input_size > 0); |
114 | 5 | assert(output_size > 0); |
115 | 5 | ccv_cnnp_cmd_exec_context_t* const cmd_exec_context = (ccv_cnnp_cmd_exec_context_t*)ccmalloc(sizeof(ccv_cnnp_cmd_exec_context_t) + sizeof(ccv_nnc_tensor_param_t) * (output_size - 1)); |
116 | 5 | cmd_exec_context->tuple.size = output_size; |
117 | 5 | cmd_exec_context->input_offset = input_offset; |
118 | 5 | cmd_exec_context->input_size = input_size; |
119 | 5 | cmd_exec_context->cmd = cmd; |
120 | 5 | cmd_exec_context->hint = hint; |
121 | 5 | cmd_exec_context->flags = flags; |
122 | 5 | memcpy(cmd_exec_context->output_params, output_params, sizeof(ccv_nnc_tensor_param_t) * output_size); |
123 | 5 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_dataframe_cmd_exec, stream_type, _ccv_cnnp_tensor_list_deinit, COLUMN_ID_LIST(column_idx), cmd_exec_context, (ccv_cnnp_column_data_context_deinit_f)ccfree, name); |
124 | 0 | return 0; |
125 | 5 | } |
126 | | |
127 | | // MARK - Make Auxiliary Tensor as a new Column |
128 | | |
129 | | static void _ccv_cnnp_tensor_deinit(void* const data, void* const context) |
130 | 5.28k | { |
131 | 5.28k | ccv_nnc_tensor_free((ccv_nnc_tensor_t*)data); |
132 | 5.28k | } |
133 | | |
134 | | static void _ccv_cnnp_tensor_new(const int column_idx, const int* const row_idxs, const int row_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) |
135 | 708 | { |
136 | 708 | ccv_nnc_tensor_param_t params = *(ccv_nnc_tensor_param_t*)context; |
137 | 708 | int i; |
138 | 1.41k | for (i = 0; i < row_size; i++708 ) |
139 | 708 | if (!data[i]) |
140 | 27 | data[i] = ccv_nnc_tensor_new(0, params, 0); |
141 | 708 | } |
142 | | |
143 | | int ccv_cnnp_dataframe_add_aux(ccv_cnnp_dataframe_t* const dataframe, const ccv_nnc_tensor_param_t params, const char* name) |
144 | 18 | { |
145 | 18 | int stream_type = CCV_TENSOR_GET_MEMORY(params.type) == CCV_TENSOR_CPU_MEMORY ? 00 : CCV_STREAM_CONTEXT_GPU; |
146 | 18 | if (stream_type == CCV_STREAM_CONTEXT_GPU) |
147 | 18 | CCV_STREAM_SET_DEVICE_ID(stream_type, CCV_TENSOR_GET_DEVICE_ID(params.type)); |
148 | 18 | ccv_nnc_tensor_param_t* const context = (ccv_nnc_tensor_param_t*)ccmalloc(sizeof(ccv_nnc_tensor_param_t)); |
149 | 18 | context[0] = params; |
150 | 18 | return ccv_cnnp_dataframe_add(dataframe, _ccv_cnnp_tensor_new, stream_type, _ccv_cnnp_tensor_deinit, context, (ccv_cnnp_column_data_context_deinit_f)ccfree, name); |
151 | 18 | } |
152 | | |
153 | | // MARK - Load Tensor from File Path |
154 | | |
155 | | static void _ccv_cnnp_image_deinit(void* const data, void* const context) |
156 | 4.60k | { |
157 | 4.60k | ccv_matrix_free(data); |
158 | 4.60k | } |
159 | | |
160 | | static void _ccv_cnnp_read_image(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) |
161 | 0 | { |
162 | 0 | parallel_for(i, batch_size) { |
163 | 0 | if (data[i]) |
164 | 0 | ccv_matrix_free(data[i]); |
165 | 0 | off_t structof = (off_t)context; |
166 | 0 | const char* const filename = *(char* const*)((const char*)column_data[0][i] + structof); |
167 | 0 | data[i] = 0; |
168 | 0 | ccv_read(filename, (ccv_dense_matrix_t**)&data[i], CCV_IO_ANY_FILE | CCV_IO_RGB_COLOR); |
169 | 0 | } parallel_endfor |
170 | 0 | } |
171 | | |
172 | | int ccv_cnnp_dataframe_read_image(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const off_t structof, const char* name) |
173 | 0 | { |
174 | 0 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_read_image, 0, _ccv_cnnp_image_deinit, COLUMN_ID_LIST(column_idx), (void*)(uintptr_t)structof, 0, name); |
175 | 0 | } |
176 | | |
177 | | // MARK - Apply Random Jitter to Image |
178 | | |
179 | | typedef struct { |
180 | | sfmt_t sfmt; |
181 | | int datatype; |
182 | | ccv_cnnp_random_jitter_t random_jitter; |
183 | | } ccv_cnnp_random_jitter_context_t; |
184 | | |
185 | | static void _ccv_cnnp_image_lighting(ccv_dense_matrix_t* image, const float alpha_r, const float alpha_g, const float alpha_b) |
186 | 1 | { |
187 | 1 | assert(CCV_GET_DATA_TYPE(image->type) == CCV_32F); |
188 | 1 | assert(CCV_GET_CHANNEL(image->type) == CCV_C3); |
189 | | // These eigenvector values can be computed out of imageNet dataset (see ccv_convnet for how that is done). Here I just copied |
190 | | // from mxnet: https://github.com/apache/incubator-mxnet/blob/master/src/operator/image/image_random-inl.h#L632 |
191 | 1 | const float pca_r = alpha_r * (55.46 * -0.5675) + alpha_g * (4.794 * 0.7192) + alpha_b * (1.148 * 0.4009); |
192 | 1 | const float pca_g = alpha_r * (55.46 * -0.5808) + alpha_g * (4.794 * -0.0045) + alpha_b * (1.148 * -0.8140); |
193 | 1 | const float pca_b = alpha_r * (55.46 * -0.5836) + alpha_g * (4.794 * -0.6948) + alpha_b * (1.148 * 0.4203); |
194 | 1 | int i; |
195 | 1 | const int size = image->rows * image->cols; |
196 | 1 | float* const ptr = image->data.f32; |
197 | 53.3k | for (i = 0; i < size; i++53.3k ) |
198 | 53.3k | { |
199 | 53.3k | ptr[i * 3] = ccv_clamp(ptr[i * 3] + pca_r, 0, 255); |
200 | 53.3k | ptr[i * 3 + 1] = ccv_clamp(ptr[i * 3 + 1] + pca_g, 0, 255); |
201 | 53.3k | ptr[i * 3 + 2] = ccv_clamp(ptr[i * 3 + 2] + pca_b, 0, 255); |
202 | 53.3k | } |
203 | 1 | } |
204 | | |
205 | | static float _ccv_cnnp_random_logexp(sfmt_t* const sfmt, const float jitter) |
206 | 4 | { |
207 | | // We want to get something around logarithmic scale, thus, 0 is no good, and infinity is no good. 1 is the same. |
208 | | // jitter is some turbulence we want around 1. We want the range range to be around [1 / (1 + jitter), 1 + jitter] |
209 | | // but the distribution is not uniform (50% fall under 1, and 50% fall above 1). The way to do this is to first |
210 | | // get to logarithmic range, doing a uniform sampling, and then convert back. |
211 | 4 | double log_jitter_limit = log(1 + jitter); |
212 | 4 | double log_random_jitter = sfmt_genrand_real1(sfmt) * 2 * log_jitter_limit - log_jitter_limit; |
213 | 4 | return (float)exp(log_random_jitter); // Convert it back to exponential form. |
214 | 4 | } |
215 | | |
216 | | static void _ccv_cnnp_image_manip(ccv_dense_matrix_t* image, const ccv_cnnp_random_jitter_t random_jitter, sfmt_t* const sfmt) |
217 | 150k | { |
218 | 150k | assert(sfmt && CCV_GET_CHANNEL(image->type) == CCV_C3); |
219 | 150k | int idx[4] = {0, 1, 2, 3}; |
220 | 150k | sfmt_genrand_shuffle(sfmt, idx, 4, sizeof(int)); |
221 | 150k | int i; |
222 | 750k | for (i = 0; i < 4; i++600k ) |
223 | | // change the applying order |
224 | 600k | switch (idx[i]) |
225 | 600k | { |
226 | 150k | case 0: |
227 | 150k | if (random_jitter.brightness == 0) |
228 | 150k | break; |
229 | | // introduce some brightness changes to the original image |
230 | 1 | ccv_scale(image, (ccv_matrix_t**)&image, 0, _ccv_cnnp_random_logexp(sfmt, random_jitter.brightness)); |
231 | 1 | break; |
232 | 150k | case 1: |
233 | | // introduce some saturation changes to the original image |
234 | 150k | if (random_jitter.saturation == 0) |
235 | 150k | break; |
236 | 1 | ccv_saturation(image, &image, 0, _ccv_cnnp_random_logexp(sfmt, random_jitter.saturation)); |
237 | 1 | break; |
238 | 150k | case 2: |
239 | | // introduce some contrast changes to the original image |
240 | 150k | if (random_jitter.contrast == 0) |
241 | 150k | break; |
242 | 1 | ccv_contrast(image, &image, 0, _ccv_cnnp_random_logexp(sfmt, random_jitter.contrast)); |
243 | 1 | break; |
244 | 150k | case 3: |
245 | 150k | if (random_jitter.lighting == 0) |
246 | 150k | break; |
247 | 1 | _ccv_cnnp_image_lighting(image, sfmt_genrand_real1(sfmt) * random_jitter.lighting, sfmt_genrand_real1(sfmt) * random_jitter.lighting, sfmt_genrand_real1(sfmt) * random_jitter.lighting); |
248 | 1 | break; |
249 | 600k | } |
250 | 150k | } |
251 | | |
252 | | static void _ccv_cnnp_normalize(ccv_dense_matrix_t* const image, const float mean[3], const float inv_std[3]) |
253 | 150k | { |
254 | 150k | int i; |
255 | 150k | const int count = image->rows * image->cols; |
256 | 150k | float* ap = image->data.f32; |
257 | 153M | for (i = 0; i < count; i++153M ) |
258 | 153M | { |
259 | 153M | ap[i * 3] = (ap[i * 3] - mean[0]) * inv_std[0]; |
260 | 153M | ap[i * 3 + 1] = (ap[i * 3 + 1] - mean[1]) * inv_std[1]; |
261 | 153M | ap[i * 3 + 2] = (ap[i * 3 + 2] - mean[2]) * inv_std[2]; |
262 | 153M | } |
263 | 150k | } |
264 | | |
265 | | static void _ccv_cnnp_random_jitter(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) |
266 | 295 | { |
267 | 295 | sfmt_t* const sfmt = (sfmt_t*)ccmalloc(sizeof(sfmt_t) * batch_size); |
268 | 295 | ccv_cnnp_random_jitter_context_t* const ctx = (ccv_cnnp_random_jitter_context_t*)context; |
269 | 295 | int i; |
270 | 150k | for (i = 0; i < batch_size; i++150k ) |
271 | 150k | sfmt_init_gen_rand(&sfmt[i], sfmt_genrand_uint32(&ctx->sfmt)); |
272 | 295 | const ccv_cnnp_random_jitter_t random_jitter = ctx->random_jitter; |
273 | 295 | assert(random_jitter.resize.min > 0); |
274 | 295 | assert(random_jitter.resize.max >= random_jitter.resize.min); |
275 | 150k | parallel_for295 (i, batch_size) { |
276 | 150k | if (data[i]) |
277 | 145k | ccv_matrix_free(data[i]); |
278 | 150k | ccv_dense_matrix_t* const input = (ccv_dense_matrix_t*)column_data[0][i]; |
279 | 150k | const int resize = ccv_clamp((int)(sfmt_genrand_real1(&sfmt[i]) * (random_jitter.resize.max - random_jitter.resize.min) + 0.5) + random_jitter.resize.min, random_jitter.resize.min, random_jitter.resize.max); |
280 | 150k | int resize_rows = ccv_max(resize, (int)(input->rows * (float)resize / input->cols + 0.5)); |
281 | 150k | int resize_cols = ccv_max(resize, (int)(input->cols * (float)resize / input->rows + 0.5)); |
282 | 150k | if (random_jitter.aspect_ratio > 0) |
283 | 1 | { |
284 | 1 | const float aspect_ratio = sqrtf(_ccv_cnnp_random_logexp(&sfmt[i], random_jitter.aspect_ratio)); |
285 | 1 | resize_rows = (int)(resize_rows * aspect_ratio + 0.5); |
286 | 1 | resize_cols = (int)(resize_cols / aspect_ratio + 0.5); |
287 | 1 | } |
288 | 150k | if (random_jitter.resize.roundup > 0) |
289 | 0 | { |
290 | 0 | const int roundup = random_jitter.resize.roundup; |
291 | 0 | const int roundup_2 = roundup / 2; |
292 | 0 | resize_rows = (resize_rows + roundup_2) / roundup * roundup; |
293 | 0 | resize_cols = (resize_cols + roundup_2) / roundup * roundup; |
294 | 0 | } |
295 | 150k | const int need_crop = (random_jitter.size.cols > 0 && random_jitter.size.rows > 0 && |
296 | 150k | ((resize_cols != random_jitter.size.cols || resize_rows != random_jitter.size.rows150k ) || |
297 | 150k | (150k random_jitter.offset.x != 0150k || random_jitter.offset.y != 00 ))); |
298 | 150k | int cropped = 0, crop_x = 0, crop_y = 0; |
299 | 150k | ccv_dense_matrix_t* sliced = 0; |
300 | 150k | if (need_crop) |
301 | 150k | { |
302 | | // Compute crop x, y. |
303 | 150k | crop_x = random_jitter.center_crop ? |
304 | 0 | (resize_cols - random_jitter.size.cols + 1) / 2 : // Otherwise, random select x. |
305 | 150k | (int)(sfmt_genrand_real1(&sfmt[i]) * (resize_cols - random_jitter.size.cols + 1)); |
306 | 150k | crop_x = ccv_clamp(crop_x, |
307 | 150k | ccv_min(0, resize_cols - random_jitter.size.cols), |
308 | 150k | ccv_max(0, resize_cols - random_jitter.size.cols)); |
309 | 150k | crop_y = random_jitter.center_crop ? |
310 | 0 | (resize_rows - random_jitter.size.rows + 1) / 2 : // Otherwise, random select y. |
311 | 150k | (int)(sfmt_genrand_real1(&sfmt[i]) * (resize_rows - random_jitter.size.rows + 1)); |
312 | 150k | crop_y = ccv_clamp(crop_y, |
313 | 150k | ccv_min(0, resize_rows - random_jitter.size.rows), |
314 | 150k | ccv_max(0, resize_rows - random_jitter.size.rows)); |
315 | 150k | if (random_jitter.offset.x != 0) |
316 | 150k | crop_x += sfmt_genrand_real1(&sfmt[i]) * random_jitter.offset.x * 2 - random_jitter.offset.x; |
317 | 150k | if (random_jitter.offset.y != 0) |
318 | 150k | crop_y += sfmt_genrand_real1(&sfmt[i]) * random_jitter.offset.y * 2 - random_jitter.offset.y; |
319 | | // If we can fill in the whole view (not introducing any 0 padding), we can first crop and then scale down / up. |
320 | 150k | if (resize_cols >= random_jitter.size.cols && resize_rows >= random_jitter.size.rows) |
321 | 150k | { |
322 | 150k | const float scale_x = (float)input->cols / resize_cols; |
323 | 150k | const float scale_y = (float)input->rows / resize_rows; |
324 | 150k | const int slice_cols = (int)(random_jitter.size.cols * scale_x + 0.5); |
325 | 150k | const int slice_rows = (int)(random_jitter.size.rows * scale_y + 0.5); |
326 | 150k | assert(slice_cols <= input->cols); |
327 | 150k | assert(slice_rows <= input->rows); |
328 | 150k | const int x = ccv_clamp((int)(crop_x * scale_x + 0.5), 0, input->cols - slice_cols); |
329 | 150k | const int y = ccv_clamp((int)(crop_y * scale_y + 0.5), 0, input->rows - slice_rows); |
330 | 150k | ccv_slice(input, (ccv_matrix_t**)&sliced, 0, y, x, slice_rows, slice_cols); |
331 | 150k | resize_cols = random_jitter.size.cols; |
332 | 150k | resize_rows = random_jitter.size.rows; |
333 | 150k | cropped = 1; |
334 | 150k | } else |
335 | 1 | sliced = input; |
336 | 150k | } else |
337 | 0 | sliced = input; |
338 | 150k | ccv_dense_matrix_t* resized = 0; |
339 | | // Resize. |
340 | 150k | if (sliced->rows >= resize_rows && sliced->cols >= resize_cols) |
341 | 150k | { |
342 | | // If we can fill in the whole view, we can first crop and then scale down / up. |
343 | 150k | ccv_resample(sliced, &resized, CCV_32F, (double)resize_rows / (double)sliced->rows, (double)resize_cols / (double)sliced->cols, CCV_INTER_AREA); |
344 | 150k | } else if (0 sliced->rows != resize_rows0 || sliced->cols != resize_cols0 ) { |
345 | 0 | ccv_resample(sliced, &resized, CCV_32F, (double)resize_rows / (double)sliced->rows, (double)resize_cols / (double)sliced->cols, CCV_INTER_CUBIC); |
346 | 0 | } else { |
347 | 0 | ccv_shift(sliced, (ccv_matrix_t**)&resized, CCV_32F, 0, 0); // converting to 32f |
348 | 0 | } |
349 | 150k | if (sliced != input) |
350 | 150k | ccv_matrix_free(sliced); |
351 | 150k | if (random_jitter.symmetric && (sfmt_genrand_uint32(&sfmt[i]) & 1) == 0150k ) |
352 | 75.1k | ccv_flip(resized, &resized, 0, CCV_FLIP_X); |
353 | 150k | _ccv_cnnp_image_manip(resized, random_jitter, &sfmt[i]); |
354 | | // Apply normalization. Slice will introduce 0 padding, which won't be correct before normalization. |
355 | 150k | if (random_jitter.normalize.mean[0] != 0 || random_jitter.normalize.std[0] != 10 || |
356 | 150k | random_jitter.normalize.mean[1] != 00 || random_jitter.normalize.std[1] != 10 || |
357 | 150k | random_jitter.normalize.mean[2] != 00 || random_jitter.normalize.std[2] != 10 ) |
358 | 150k | _ccv_cnnp_normalize(resized, random_jitter.normalize.mean, random_jitter.normalize.std); |
359 | | // If we haven't cropped in previous step (likely because we have some fill-ins due to the resize down too much). |
360 | | // Do the crop now. |
361 | 150k | ccv_dense_matrix_t* patch = 0; |
362 | 150k | if (!cropped && need_crop1 ) |
363 | 1 | { |
364 | 1 | ccv_slice(resized, (ccv_matrix_t**)&patch, CCV_32F, crop_y, crop_x, random_jitter.size.rows, random_jitter.size.cols); |
365 | 1 | ccv_matrix_free(resized); |
366 | 1 | } else |
367 | 150k | patch = resized; |
368 | 150k | assert(!ccv_any_nan(patch)); |
369 | 150k | data[i] = patch; |
370 | 150k | } parallel_endfor |
371 | 295 | ccfree(sfmt); |
372 | 295 | } |
373 | | |
374 | | int ccv_cnnp_dataframe_image_random_jitter(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const int datatype, const ccv_cnnp_random_jitter_t random_jitter, const char* name) |
375 | 4 | { |
376 | 4 | assert(datatype == CCV_32F); |
377 | 4 | ccv_cnnp_random_jitter_context_t* const random_jitter_context = (ccv_cnnp_random_jitter_context_t*)ccmalloc(sizeof(ccv_cnnp_random_jitter_context_t)); |
378 | 4 | if (random_jitter.seed) |
379 | 4 | sfmt_init_gen_rand(&random_jitter_context->sfmt, (uint32_t)random_jitter.seed); |
380 | 0 | else |
381 | 0 | sfmt_init_gen_rand(&random_jitter_context->sfmt, ccv_nnc_stream_context_genrand_uint32(0)); |
382 | 4 | random_jitter_context->datatype = datatype; |
383 | 4 | random_jitter_context->random_jitter = random_jitter; |
384 | 4 | int i; |
385 | | // The std in the random jitter should be inv_std. |
386 | 16 | for (i = 0; i < 3; i++12 ) |
387 | 12 | random_jitter_context->random_jitter.normalize.std[i] = random_jitter_context->random_jitter.normalize.std[i] ? 1. / random_jitter_context->random_jitter.normalize.std[i]3 : 19 ; |
388 | 4 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_random_jitter, 0, _ccv_cnnp_image_deinit, COLUMN_ID_LIST(column_idx), random_jitter_context, (ccv_cnnp_column_data_context_deinit_f)ccfree, name); |
389 | 4 | } |
390 | | |
391 | | typedef struct { |
392 | | int range; |
393 | | int datatype; |
394 | | int format; |
395 | | float onval; |
396 | | float offval; |
397 | | off_t structof; |
398 | | } ccv_cnnp_one_hot_context_t; |
399 | | |
400 | | static void _ccv_cnnp_one_hot(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) |
401 | 305 | { |
402 | 305 | ccv_cnnp_one_hot_context_t* const one_hot = (ccv_cnnp_one_hot_context_t*)context; |
403 | 305 | ccv_nnc_tensor_param_t params = { |
404 | 305 | .datatype = one_hot->datatype, |
405 | 305 | .type = CCV_TENSOR_CPU_MEMORY, |
406 | 305 | .format = one_hot->format, |
407 | 305 | .dim = { |
408 | 305 | one_hot->range, |
409 | 305 | }, |
410 | 305 | }; |
411 | 150k | parallel_for305 (i, batch_size) { |
412 | 150k | int j; |
413 | 150k | const int label = *(const int*)((const char*)column_data[0][i] + one_hot->structof); |
414 | 150k | if (!data[i]) |
415 | 4.87k | data[i] = ccv_nnc_tensor_new(0, params, 0); |
416 | 150k | ccv_nnc_tensor_t* const tensor = (ccv_nnc_tensor_t*)data[i]; |
417 | 150k | assert(label >= 0 && label < one_hot->range); |
418 | 150k | if (tensor->info.datatype == CCV_32F) |
419 | 550k | for (j = 0; 50.2k j < one_hot->range; j++500k ) |
420 | 500k | tensor->data.f32[j] = (j == label) ? one_hot->onval50.2k : one_hot->offval450k ; |
421 | 100k | else if (tensor->info.datatype == CCV_16F) |
422 | 1.10M | for (j = 0; 100k j < one_hot->range; j++1.00M ) |
423 | 1.00M | ccv_float_to_half_precision((j == label) ? &one_hot->onval100k : &one_hot->offval900k , (uint16_t*)(tensor->data.f16 + j), 1); |
424 | 0 | else |
425 | 0 | { assert(0); } |
426 | 150k | } parallel_endfor |
427 | 305 | } |
428 | | |
429 | | int ccv_cnnp_dataframe_one_hot(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const off_t structof, const int range, const float onval, const float offval, const int datatype, const int format, const char* name) |
430 | 7 | { |
431 | 7 | assert(datatype == CCV_32F || datatype == CCV_16F); |
432 | 7 | ccv_cnnp_one_hot_context_t* const one_hot = (ccv_cnnp_one_hot_context_t*)ccmalloc(sizeof(ccv_cnnp_one_hot_context_t)); |
433 | 7 | one_hot->range = range; |
434 | 7 | one_hot->datatype = datatype; |
435 | 7 | one_hot->format = format; |
436 | 7 | one_hot->onval = onval; |
437 | 7 | one_hot->offval = offval; |
438 | 7 | one_hot->structof = structof; |
439 | 7 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_one_hot, 0, _ccv_cnnp_tensor_deinit, COLUMN_ID_LIST(column_idx), one_hot, (ccv_cnnp_column_data_context_deinit_f)ccfree, name); |
440 | 7 | } |
441 | | |
442 | | typedef struct { |
443 | | int from_dt; |
444 | | int to_dt; |
445 | | int format; |
446 | | off_t structof; |
447 | | } ccv_cnnp_copy_scalar_context_t; |
448 | | |
449 | | static void _ccv_cnnp_copy_scalar(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) |
450 | 14 | { |
451 | 14 | ccv_cnnp_copy_scalar_context_t* const copy_scalar = (ccv_cnnp_copy_scalar_context_t*)context; |
452 | 14 | ccv_nnc_tensor_param_t params = { |
453 | 14 | .datatype = copy_scalar->to_dt, |
454 | 14 | .type = CCV_TENSOR_CPU_MEMORY, |
455 | 14 | .format = copy_scalar->format, |
456 | 14 | .dim = {1}, |
457 | 14 | }; |
458 | 520 | parallel_for14 (i, batch_size) { |
459 | 520 | const ccv_numeric_data_t value = { |
460 | 520 | .u8 = (unsigned char *)((const char*)column_data[0][i] + copy_scalar->structof), |
461 | 520 | }; |
462 | 520 | if (!data[i]) |
463 | 387 | data[i] = ccv_nnc_tensor_new(0, params, 0); |
464 | 520 | ccv_nnc_tensor_t* const tensor = (ccv_nnc_tensor_t*)data[i]; |
465 | 520 | if (copy_scalar->from_dt == CCV_32S) |
466 | 520 | { |
467 | 520 | if (tensor->info.datatype == CCV_32F) |
468 | 520 | tensor->data.f32[0] = value.i32[0]; |
469 | 0 | else if (tensor->info.datatype == CCV_16F) { |
470 | 0 | float fval = value.i32[0]; |
471 | 0 | ccv_float_to_half_precision(&fval, (uint16_t*)tensor->data.f16, 1); |
472 | 0 | } |
473 | 520 | } else if (0 copy_scalar->from_dt == CCV_32F0 ) { |
474 | 0 | if (tensor->info.datatype == CCV_32F) |
475 | 0 | tensor->data.f32[0] = value.f32[0]; |
476 | 0 | else if (tensor->info.datatype == CCV_16F) |
477 | 0 | ccv_float_to_half_precision(value.f32, (uint16_t*)tensor->data.f16, 1); |
478 | 0 | } else if (copy_scalar->from_dt == CCV_16F) { |
479 | 0 | if (tensor->info.datatype == CCV_32F) |
480 | 0 | ccv_half_precision_to_float((uint16_t*)value.f16, tensor->data.f32, 1); |
481 | 0 | else if (tensor->info.datatype == CCV_16F) |
482 | 0 | tensor->data.f16[0] = value.f16[0]; |
483 | 0 | } |
484 | 520 | } parallel_endfor |
485 | 14 | } |
486 | | |
487 | | CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_copy_scalar(ccv_cnnp_dataframe_t* const dataframe, const int column_idx, const off_t structof, const int from_dt, const int to_dt, const int format, const char* name) |
488 | 5 | { |
489 | 5 | assert(from_dt == CCV_32S || from_dt == CCV_32F || from_dt == CCV_16F); |
490 | 5 | assert(to_dt == CCV_32F || to_dt == CCV_16F); |
491 | 5 | ccv_cnnp_copy_scalar_context_t* const copy_scalar = (ccv_cnnp_copy_scalar_context_t*)ccmalloc(sizeof(ccv_cnnp_copy_scalar_context_t)); |
492 | 5 | copy_scalar->from_dt = from_dt; |
493 | 5 | copy_scalar->to_dt = to_dt; |
494 | 5 | copy_scalar->format = format; |
495 | 5 | copy_scalar->structof = structof; |
496 | 5 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_copy_scalar, 0, _ccv_cnnp_tensor_deinit, COLUMN_ID_LIST(column_idx), copy_scalar, (ccv_cnnp_column_data_context_deinit_f)ccfree, name); |
497 | 5 | } |
498 | | |
499 | | // MARK - Matrix of Ones |
500 | | |
501 | | typedef struct { |
502 | | ccv_cnnp_dataframe_tuple_t tuple; |
503 | | int variable_size; |
504 | | int max_length; |
505 | | } ccv_cnnp_one_squared_context_t; |
506 | | |
507 | | static void _ccv_cnnp_one_squared(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) |
508 | 12 | { |
509 | 12 | ccv_cnnp_one_squared_context_t* const ones = (ccv_cnnp_one_squared_context_t*)context; |
510 | 12 | assert(ones->tuple.size == column_size); |
511 | 12 | const int max_length = ones->max_length; |
512 | 12 | if (ones->variable_size) |
513 | 3 | { |
514 | 3 | parallel_for(i, batch_size) { |
515 | 3 | ccv_nnc_tensor_t* const first_seq = (ccv_nnc_tensor_t*)column_data[0][i]; |
516 | 3 | assert(first_seq->info.datatype == CCV_32S); |
517 | 3 | const int first_len = ccv_nnc_tensor_count(first_seq->info); |
518 | 3 | ccv_nnc_tensor_t** outputs = data[i]; |
519 | 3 | if (!outputs) |
520 | 3 | outputs = (ccv_nnc_tensor_t**)(data[i] = cccalloc(column_size, sizeof(ccv_nnc_tensor_t*))); |
521 | 3 | int k; |
522 | 12 | for (k = 0; k < column_size; k++9 ) |
523 | 9 | if (!outputs[k]) |
524 | 9 | outputs[k] = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, first_len, max_length, max_length), 0); |
525 | 3 | int max_len = 0; |
526 | 12 | for (k = 0; k < column_size; k++9 ) |
527 | 9 | { |
528 | 9 | ccv_nnc_tensor_t* const seq = (ccv_nnc_tensor_t*)column_data[k][i]; |
529 | 9 | assert(seq->info.datatype == CCV_32S); |
530 | 9 | const int len = ccv_nnc_tensor_count(seq->info); |
531 | 9 | assert(len == first_len); |
532 | 9 | const int* const ia = seq->data.i32; |
533 | 9 | int l; |
534 | 523 | for (l = 0; l < len; l++514 ) |
535 | 514 | max_len = ccv_max(max_len, ia[l]); |
536 | 9 | } |
537 | 3 | assert(max_len <= max_length); |
538 | 9 | parallel_for3 (c, column_size) { |
539 | 9 | ccv_nnc_tensor_t* const seq = (ccv_nnc_tensor_t*)column_data[c][i]; |
540 | 9 | assert(seq->info.datatype == CCV_32S); |
541 | 9 | const int len = ccv_nnc_tensor_count(seq->info); |
542 | 9 | assert(len == first_len); |
543 | 9 | ccv_nnc_tensor_t* tensor = outputs[c]; |
544 | 9 | tensor = ccv_nnc_tensor_resize(tensor, CPU_TENSOR_NHWC(32S, len, max_len, max_len)); |
545 | 9 | assert(outputs[c] == tensor); // Since we allocated with max_length, this cannot be reallocated. |
546 | 9 | const int* const ia = seq->data.i32; |
547 | 514 | parallel_for9 (j, len) { |
548 | 514 | int x, y; |
549 | 514 | int seq_len = ia[j]; |
550 | 514 | int* ib = tensor->data.i32 + j * max_len * max_len; |
551 | 118k | for (y = 0; y < seq_len; y++118k ) |
552 | 118k | { |
553 | 37.0M | for (x = 0; x < seq_len; x++36.9M ) |
554 | 36.9M | ib[x] = 1; |
555 | 23.6M | for (x = seq_len; x < max_len; x++23.5M ) |
556 | 23.5M | ib[x] = 0; |
557 | 118k | ib += max_len; |
558 | 118k | } |
559 | 514 | if (seq_len < max_len) |
560 | 473 | memset(ib, 0, sizeof(int) * max_len * (max_len - seq_len)); |
561 | 514 | } parallel_endfor |
562 | 9 | } parallel_endfor |
563 | 3 | } parallel_endfor |
564 | 9 | } else { |
565 | 9 | parallel_for(i, batch_size) { |
566 | 9 | ccv_nnc_tensor_t** outputs = data[i]; |
567 | 9 | ccv_nnc_tensor_t* const first_seq = (ccv_nnc_tensor_t*)column_data[0][i]; |
568 | 9 | assert(first_seq->info.datatype == CCV_32S); |
569 | 9 | const int first_len = ccv_nnc_tensor_count(first_seq->info); |
570 | 9 | if (!outputs) |
571 | 9 | outputs = (ccv_nnc_tensor_t**)(data[i] = cccalloc(column_size, sizeof(ccv_nnc_tensor_t*))); |
572 | 9 | int k; |
573 | 18 | for (k = 0; k < column_size; k++9 ) |
574 | 9 | if (!outputs[k]) |
575 | 9 | outputs[k] = ccv_nnc_tensor_new(0, CPU_TENSOR_NHWC(32S, first_len, max_length, max_length), 0); |
576 | 9 | parallel_for(c, column_size) { |
577 | 9 | ccv_nnc_tensor_t* const tensor = outputs[c]; |
578 | 9 | ccv_nnc_tensor_t* const seq = (ccv_nnc_tensor_t*)column_data[c][i]; |
579 | 9 | assert(seq->info.datatype == CCV_32S); |
580 | 9 | const int len = ccv_nnc_tensor_count(seq->info); |
581 | 9 | assert(len == first_len); |
582 | 9 | const int* const ia = seq->data.i32; |
583 | 514 | parallel_for9 (j, len) { |
584 | 514 | int x, y; |
585 | 514 | int seq_len = ia[j]; |
586 | 514 | int* ib = tensor->data.i32 + j * max_length * max_length; |
587 | 118k | for (y = 0; y < seq_len; y++118k ) |
588 | 118k | { |
589 | 37.0M | for (x = 0; x < seq_len; x++36.9M ) |
590 | 36.9M | ib[x] = 1; |
591 | 23.6M | for (x = seq_len; x < max_length; x++23.5M ) |
592 | 23.5M | ib[x] = 0; |
593 | 118k | ib += max_length; |
594 | 118k | } |
595 | 514 | if (seq_len < max_length) |
596 | 474 | memset(ib, 0, sizeof(int) * max_length * (max_length - seq_len)); |
597 | 514 | } parallel_endfor |
598 | 9 | } parallel_endfor |
599 | 9 | } parallel_endfor |
600 | 9 | } |
601 | 12 | } |
602 | | |
603 | | CCV_WARN_UNUSED(int) ccv_cnnp_dataframe_one_squared(ccv_cnnp_dataframe_t* const dataframe, const int* const column_idxs, const int column_idx_size, const int variable_size, const int max_length, const char* name) |
604 | 12 | { |
605 | 12 | assert(max_length > 0); |
606 | 12 | assert(variable_size == 0 || variable_size == 1); |
607 | 12 | ccv_cnnp_one_squared_context_t* const ones = (ccv_cnnp_one_squared_context_t*)ccmalloc(sizeof(ccv_cnnp_one_squared_context_t)); |
608 | 12 | ones->tuple.size = column_idx_size; |
609 | 12 | ones->variable_size = variable_size; |
610 | 12 | ones->max_length = max_length; |
611 | 12 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_one_squared, 0, _ccv_cnnp_tensor_list_deinit, column_idxs, column_idx_size, ones, (ccv_cnnp_column_data_context_deinit_f)ccfree, name); |
612 | 12 | } |
613 | | |
614 | | // MARK - Truncate Matrix |
615 | | |
616 | | static void _ccv_cnnp_truncate(void* const* const* const column_data, const int column_size, const int batch_size, void** const data, void* const context, ccv_nnc_stream_context_t* const stream_context) |
617 | 7 | { |
618 | 7 | assert(column_size >= 2); |
619 | 7 | assert(column_size % 2 == 0); |
620 | 7 | const int tuple_size = column_size / 2; |
621 | 7 | ccv_cnnp_dataframe_tuple_t* const tuple = (ccv_cnnp_dataframe_tuple_t*)context; |
622 | 7 | assert(tuple->size == tuple_size); |
623 | 7 | parallel_for(i, batch_size) { |
624 | 7 | int k; |
625 | 7 | ccv_nnc_tensor_t* const first_seq = (ccv_nnc_tensor_t*)column_data[tuple_size][i]; |
626 | 7 | assert(first_seq->info.datatype == CCV_32S); |
627 | 7 | const int first_len = ccv_nnc_tensor_count(first_seq->info); |
628 | 7 | int max_len = 0; |
629 | 20 | for (k = 0; k < tuple_size; k++13 ) |
630 | 13 | { |
631 | 13 | ccv_nnc_tensor_t* const seq = (ccv_nnc_tensor_t*)column_data[tuple_size + k][i]; |
632 | 13 | assert(seq->info.datatype == CCV_32S); |
633 | 13 | const int len = ccv_nnc_tensor_count(seq->info); |
634 | 13 | assert(len == first_len); |
635 | 13 | const int* const ia = seq->data.i32; |
636 | 13 | int l; |
637 | 783 | for (l = 0; l < len; l++770 ) |
638 | 770 | max_len = ccv_max(max_len, ia[l]); |
639 | 13 | } |
640 | 7 | ccv_nnc_tensor_t* const first_inp = (ccv_nnc_tensor_t*)column_data[0][i]; |
641 | 7 | ccv_nnc_tensor_param_t first_params = first_inp->info; |
642 | 7 | assert(first_params.dim[0] == first_len); |
643 | 7 | assert(max_len <= first_params.dim[1]); |
644 | 7 | first_params.dim[1] = max_len; |
645 | 7 | ccv_nnc_tensor_t** outputs = data[i]; |
646 | 7 | if (!outputs) |
647 | 5 | outputs = (ccv_nnc_tensor_t**)(data[i] = cccalloc(tuple_size, sizeof(ccv_nnc_tensor_t*))); |
648 | 20 | for (k = 0; k < tuple_size; k++13 ) |
649 | 13 | { |
650 | 13 | if (!outputs[k]) |
651 | 11 | outputs[k] = ccv_nnc_tensor_new(0, first_params, 0); |
652 | 2 | else |
653 | 2 | outputs[k] = ccv_nnc_tensor_resize(outputs[k], first_params); |
654 | 13 | } |
655 | 13 | parallel_for7 (c, tuple_size) { |
656 | 13 | ccv_nnc_tensor_t* const seq = (ccv_nnc_tensor_t*)column_data[tuple_size + c][i]; |
657 | 13 | assert(seq->info.datatype == CCV_32S); |
658 | 13 | const int len = ccv_nnc_tensor_count(seq->info); |
659 | 13 | ccv_nnc_tensor_t* const inp = (ccv_nnc_tensor_t*)column_data[c][i]; |
660 | 13 | ccv_nnc_tensor_param_t params = inp->info; |
661 | 13 | assert(params.dim[0] == len); |
662 | 13 | assert(first_len == len); |
663 | 13 | assert(max_len <= params.dim[1]); |
664 | 13 | assert(params.dim[2] == 0); |
665 | 13 | const int ori_len = params.dim[1]; |
666 | 13 | ccv_nnc_tensor_t* const out = outputs[c]; |
667 | 13 | uint8_t* const ua = inp->data.u8; |
668 | 13 | uint8_t* const ub = out->data.u8; |
669 | 13 | size_t la = CCV_GET_DATA_TYPE_SIZE(params.datatype) * ori_len; |
670 | 13 | size_t lb = CCV_GET_DATA_TYPE_SIZE(params.datatype) * max_len; |
671 | 770 | parallel_for13 (j, len) { |
672 | 770 | memcpy(ub + lb * j, ua + la * j, lb); |
673 | 770 | } parallel_endfor |
674 | 13 | } parallel_endfor |
675 | 7 | } parallel_endfor |
676 | 7 | } |
677 | | |
678 | | int ccv_cnnp_dataframe_truncate(ccv_cnnp_dataframe_t* const dataframe, const int* const vec_idxs, const int vec_idx_size, const int* const len_idxs, const int len_idx_size, const char* name) |
679 | 5 | { |
680 | 5 | const int total_idx_size = vec_idx_size + len_idx_size; |
681 | 5 | assert(total_idx_size > 0); |
682 | 5 | assert(vec_idx_size == len_idx_size); |
683 | 5 | int total_idxs[total_idx_size]; |
684 | 5 | memcpy(total_idxs, vec_idxs, sizeof(int) * vec_idx_size); |
685 | 5 | memcpy(total_idxs + vec_idx_size, len_idxs, sizeof(int) * len_idx_size); |
686 | 5 | ccv_cnnp_dataframe_tuple_t* const tuple = (ccv_cnnp_dataframe_tuple_t*)ccmalloc(sizeof(ccv_cnnp_dataframe_tuple_t)); |
687 | 5 | tuple->size = vec_idx_size; |
688 | 5 | return ccv_cnnp_dataframe_map(dataframe, _ccv_cnnp_truncate, 0, _ccv_cnnp_tensor_list_deinit, total_idxs, total_idx_size, tuple, (ccv_cnnp_column_data_context_deinit_f)ccfree, name); |
689 | 5 | } |
690 | | |
691 | | // MARK - Batching |
692 | | |
693 | | typedef struct { |
694 | | ccv_cnnp_dataframe_tuple_t tuple; |
695 | | int format; |
696 | | int batch_count; |
697 | | int group_count; |
698 | | } ccv_cnnp_batch_context_t; |
699 | | |
700 | | static void _ccv_cnnp_combine_new(void* const* const input_data, const int input_size, void** const output_data, void* const context, ccv_nnc_stream_context_t* const stream_context) |
701 | 363 | { |
702 | 363 | ccv_cnnp_batch_context_t* const batch = (ccv_cnnp_batch_context_t*)context; |
703 | 363 | const int output_tuple_size = batch->tuple.size; |
704 | 363 | const int batch_count = batch->batch_count; |
705 | 363 | const int group_count = batch->group_count; |
706 | 363 | const int input_tuple_size = output_tuple_size / group_count; |
707 | 363 | int i, j, k; |
708 | 363 | assert(input_size > 0); |
709 | 363 | if (!output_data[0]) |
710 | 16 | { |
711 | 16 | ccv_nnc_tensor_t** const inputs = (ccv_nnc_tensor_t**)input_data[0]; |
712 | 16 | ccv_nnc_tensor_t** const tensors = (ccv_nnc_tensor_t**)(output_data[0] = ccmalloc(sizeof(ccv_nnc_tensor_t*) * output_tuple_size)); |
713 | 63 | for (i = 0; i < group_count; i++47 ) |
714 | 148 | for (j = 0; 47 j < input_tuple_size; j++101 ) |
715 | 101 | { |
716 | 101 | ccv_nnc_tensor_param_t params = inputs[j]->info; |
717 | 101 | assert(params.datatype == CCV_32F || params.datatype == CCV_32S || params.datatype == CCV_16F); // Only support 32 bit float yet. |
718 | 101 | assert(params.format == CCV_TENSOR_FORMAT_NHWC || params.format == CCV_TENSOR_FORMAT_NCHW); |
719 | 101 | params.format = batch->format; |
720 | | // Special-case for dim count is 3 and 1, in these two cases, the N is not provided. |
721 | 101 | if (batch->format == inputs[j]->info.format) |
722 | 38 | { |
723 | 38 | const int nd = ccv_nnc_tensor_nd(params.dim); |
724 | 38 | memset(params.dim, 0, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
725 | 38 | memcpy(params.dim + 1, inputs[j]->info.dim, sizeof(int) * nd); |
726 | 63 | } else { |
727 | 63 | const int nd = ccv_nnc_tensor_nd(params.dim); |
728 | 63 | if (nd < 3) |
729 | 36 | { |
730 | 36 | memset(params.dim, 0, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
731 | 36 | memcpy(params.dim + 1, inputs[j]->info.dim, sizeof(int) * nd); |
732 | 36 | } else if (27 nd >= 327 ) { |
733 | 27 | memset(params.dim, 0, sizeof(int) * CCV_NNC_MAX_DIM_ALLOC); |
734 | 27 | const int hw = ccv_nnc_tensor_hw(inputs[j]->info, nd); |
735 | 27 | if (batch->format == CCV_TENSOR_FORMAT_NCHW) |
736 | 27 | { |
737 | 27 | params.dim[1] = ccv_nnc_tensor_get_c(inputs[j]->info); |
738 | 81 | for (k = 0; k < CCV_NNC_MAX_DIM; k++54 ) |
739 | 54 | params.dim[k + 2] = inputs[j]->info.dim[k + hw]; |
740 | 27 | } else { |
741 | 0 | params.dim[CCV_NNC_MAX_DIM + 1] = ccv_nnc_tensor_get_c(inputs[j]->info); |
742 | 0 | for (k = 0; k < CCV_NNC_MAX_DIM; k++) |
743 | 0 | params.dim[k + 1] = inputs[j]->info.dim[k + hw]; |
744 | 0 | } |
745 | 27 | } |
746 | 63 | } |
747 | 101 | params.dim[0] = batch_count; // Set the batch count now. |
748 | 101 | tensors[i * input_tuple_size + j] = ccv_nnc_tensor_new(0, params, 0); |
749 | 101 | } |
750 | 16 | } |
751 | 1.09k | for (i = 0; 363 i < group_count; i++730 ) |
752 | 2.08k | for (j = 0; 730 j < input_tuple_size; j++1.35k ) |
753 | 1.35k | { |
754 | 1.35k | ccv_nnc_tensor_t* const output = ((ccv_nnc_tensor_t**)output_data[0])[i * input_tuple_size + j]; |
755 | 335k | parallel_for1.35k (k, batch_count) { |
756 | 335k | ccv_nnc_tensor_t* const input = ((ccv_nnc_tensor_t**)input_data[(k + i * batch_count) % input_size])[j]; |
757 | 335k | const size_t tensor_count = ccv_nnc_tensor_count(input->info); |
758 | 335k | if (input->info.datatype == CCV_32F) |
759 | 111k | { |
760 | 111k | float* const ap = input->data.f32; |
761 | 111k | float* const bp = output->data.f32 + k * tensor_count; |
762 | 111k | if (input->info.format == output->info.format) |
763 | 51.4k | memcpy(bp, ap, sizeof(float) * tensor_count); |
764 | 60.4k | else { |
765 | | // Do a simple format conversion. |
766 | 60.4k | const int c = ccv_nnc_tensor_get_c(input->info); |
767 | 60.4k | assert(c > 0); |
768 | 60.4k | const size_t hw_count = tensor_count / c; |
769 | 60.4k | size_t x; |
770 | 60.4k | int y; |
771 | 60.4k | if (input->info.format == CCV_TENSOR_FORMAT_NHWC && output->info.format == CCV_TENSOR_FORMAT_NCHW) |
772 | 61.9M | for (x = 0; 60.4k x < hw_count; x++61.8M ) |
773 | 247M | for (y = 0; 61.8M y < c; y++185M ) |
774 | 185M | bp[y * hw_count + x] = ap[x * c + y]; |
775 | 0 | else if (input->info.format == CCV_TENSOR_FORMAT_NCHW && output->info.format == CCV_TENSOR_FORMAT_NHWC) |
776 | 0 | for (x = 0; x < hw_count; x++) |
777 | 0 | for (y = 0; y < c; y++) |
778 | 0 | bp[x * c + y] = ap[y * hw_count + x]; |
779 | 60.4k | } |
780 | 223k | } else if (input->info.datatype == CCV_32S) { |
781 | 2.56k | int* const ap = input->data.i32; |
782 | 2.56k | int* const bp = output->data.i32 + k * tensor_count; |
783 | 2.56k | if (input->info.format == output->info.format) |
784 | 0 | memcpy(bp, ap, sizeof(int) * tensor_count); |
785 | 2.56k | else { |
786 | | // Do a simple format conversion. |
787 | 2.56k | const int c = ccv_nnc_tensor_get_c(input->info); |
788 | 2.56k | assert(c > 0); |
789 | 2.56k | const size_t hw_count = tensor_count / c; |
790 | 2.56k | size_t x; |
791 | 2.56k | int y; |
792 | 2.56k | if (input->info.format == CCV_TENSOR_FORMAT_NHWC && output->info.format == CCV_TENSOR_FORMAT_NCHW) |
793 | 5.12k | for (x = 0; 2.56k x < hw_count; x++2.56k ) |
794 | 659k | for (y = 0; 2.56k y < c; y++656k ) |
795 | 656k | bp[y * hw_count + x] = ap[x * c + y]; |
796 | 0 | else if (input->info.format == CCV_TENSOR_FORMAT_NCHW && output->info.format == CCV_TENSOR_FORMAT_NHWC) |
797 | 0 | for (x = 0; x < hw_count; x++) |
798 | 0 | for (y = 0; y < c; y++) |
799 | 0 | bp[x * c + y] = ap[y * hw_count + x]; |
800 | 2.56k | } |
801 | 221k | } else if (input->info.datatype == CCV_16F) { |
802 | 221k | ccv_float16_t* const ap = input->data.f16; |
803 | 221k | ccv_float16_t* const bp = output->data.f16 + k * tensor_count; |
804 | 221k | if (input->info.format == output->info.format) |
805 | 100k | memcpy(bp, ap, sizeof(ccv_float16_t) * tensor_count); |
806 | 120k | else { |
807 | | // Do a simple format conversion. |
808 | 120k | const int c = ccv_nnc_tensor_get_c(input->info); |
809 | 120k | assert(c > 0); |
810 | 120k | const size_t hw_count = tensor_count / c; |
811 | 120k | size_t x; |
812 | 120k | int y; |
813 | 120k | if (input->info.format == CCV_TENSOR_FORMAT_NHWC && output->info.format == CCV_TENSOR_FORMAT_NCHW) |
814 | 123M | for (x = 0; 120k x < hw_count; x++123M ) |
815 | 494M | for (y = 0; 123M y < c; y++371M ) |
816 | 371M | bp[y * hw_count + x] = ap[x * c + y]; |
817 | 0 | else if (input->info.format == CCV_TENSOR_FORMAT_NCHW && output->info.format == CCV_TENSOR_FORMAT_NHWC) |
818 | 0 | for (x = 0; x < hw_count; x++) |
819 | 0 | for (y = 0; y < c; y++) |
820 | 0 | bp[x * c + y] = ap[y * hw_count + x]; |
821 | 120k | } |
822 | 221k | } else { |
823 | 0 | assert(0); |
824 | 0 | } |
825 | 335k | } parallel_endfor |
826 | 1.35k | } |
827 | 363 | } |
828 | | |
829 | | static void _ccv_cnnp_combine_deinit(void* const self, void* const context) |
830 | 16 | { |
831 | 16 | ccv_cnnp_batch_context_t* const batch = (ccv_cnnp_batch_context_t*)context; |
832 | 16 | ccv_nnc_tensor_t** const tensors = (ccv_nnc_tensor_t**)self; |
833 | 16 | const int size = batch->tuple.size; |
834 | 16 | int i; |
835 | 117 | for (i = 0; i < size; i++101 ) |
836 | 101 | ccv_nnc_tensor_free(tensors[i]); |
837 | 16 | ccfree(tensors); |
838 | 16 | } |
839 | | |
840 | | ccv_cnnp_dataframe_t* ccv_cnnp_dataframe_combine_new(ccv_cnnp_dataframe_t* const dataframe, const int* const column_idxs, const int column_idx_size, const int batch_count, const int group_count, const int format) |
841 | 13 | { |
842 | 13 | assert(format == CCV_TENSOR_FORMAT_NCHW || format == CCV_TENSOR_FORMAT_NHWC); |
843 | 13 | assert(column_idx_size >= 1); |
844 | 13 | assert(batch_count > 0); |
845 | 13 | assert(group_count > 0); |
846 | 13 | const int derived = ccv_cnnp_dataframe_make_tuple(dataframe, column_idxs, column_idx_size, 0); |
847 | 13 | ccv_cnnp_batch_context_t* const batch = (ccv_cnnp_batch_context_t*)ccmalloc(sizeof(ccv_cnnp_batch_context_t)); |
848 | 13 | batch->tuple.size = column_idx_size * group_count; |
849 | 13 | batch->format = format; |
850 | 13 | batch->batch_count = batch_count; |
851 | 13 | batch->group_count = group_count; |
852 | 13 | return ccv_cnnp_dataframe_sample_new(dataframe, _ccv_cnnp_combine_new, _ccv_cnnp_combine_deinit, derived, batch_count * group_count, batch, (ccv_cnnp_column_data_context_deinit_f)ccfree); |
853 | 13 | } |