Coverage Report

Created: 2021-04-11 20:23

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/cmd/loss/ccv_nnc_categorical_crossentropy_cpu_ref.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv.h"
2
#include "ccv_internal.h"
3
#include "nnc/ccv_nnc.h"
4
#include "nnc/ccv_nnc_easy.h"
5
#include "nnc/ccv_nnc_internal.h"
6
#ifdef USE_OPENMP
7
#include <omp.h>
8
#endif
9
#ifdef USE_DISPATCH
10
#include <dispatch/dispatch.h>
11
#endif
12
13
static int _ccv_nnc_categorical_crossentropy_forw(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
14
11
{
15
11
  assert(input_size == 2);
16
11
  const ccv_nnc_tensor_t* a = inputs[0];
17
11
  assert(!CCV_IS_TENSOR_VIEW(a));
18
11
  const ccv_nnc_tensor_t* b = inputs[1];
19
11
  assert(!CCV_IS_TENSOR_VIEW(b));
20
11
  assert(output_size == 1);
21
11
  ccv_nnc_tensor_t* c = outputs[0];
22
11
  assert(!CCV_IS_TENSOR_VIEW(c));
23
11
  const int axis_count = ccv_nnc_tensor_nd(a->info.dim);
24
11
  const int batch_size = axis_count < 2 ? 
10
: a->info.dim[0];
25
11
  const int count = ccv_nnc_tensor_count(a->info) / batch_size;
26
11
  int i;
27
11
  if (b->info.datatype == CCV_32F)
28
9
  {
29
9
    // If has more than 1 axis, then the range is the channel count. Otherwise, if our batch size is 1, then the range is
30
9
    // the channel count. Otherwise, the range is 1 (and the only axis is the batch size).
31
9
    const int range = ccv_nnc_tensor_nd(b->info.dim) > 1 ? 
ccv_nnc_tensor_get_c(b->info)1
:
(batch_size == 1 8
?
b->info.dim[0]0
:
18
);
32
9
    if (range == 1)
33
8
    {
34
16
      for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && b->info.dim[i] > 0; 
i++8
)
35
8
        { assert(b->info.dim[i] == c->info.dim[i]); }
36
8
      const float trim0 = cmd.info.label_smoothing.trim0;
37
8
      const float trim1 = cmd.info.label_smoothing.trim1;
38
8
      if (trim0 == 0 && 
trim1 == 14
)
39
4
      {
40
4
        parallel_for(i, batch_size) {
41
0
          const int label = (int)(b->data.f32[i] + 0.5);
42
0
          assert(label >= 0 && label < count);
43
19
          const float p = a->data.f32[i * count + label];
44
19
          c->data.f32[i] = -logf(p);
45
23
        } parallel_endfor
46
4
      } else {
47
4
        parallel_for(i, batch_size) {
48
0
          const int label = (int)(b->data.f32[i] + 0.5);
49
0
          assert(label >= 0 && label < count);
50
25
          int j;
51
25
          float p = 0;
52
25
          float* const ap = a->data.f32 + i * count;
53
714
          for (j = 0; j < label; 
j++689
)
54
689
            p += -trim0 * logf(ap[j]);
55
25
          p += -trim1 * logf(ap[label]);
56
1.01k
          for (j = label + 1; j < count; 
j++992
)
57
992
            p += -trim0 * logf(ap[j]);
58
25
          c->data.f32[i] = p;
59
29
        } parallel_endfor
60
4
      }
61
8
    } else {
62
1
      assert(range == count);
63
1
      parallel_for(i, batch_size) {
64
0
        int j;
65
0
        float p = 0;
66
0
        float* const bp = b->data.f32 + i * count;
67
0
        float* const ap = a->data.f32 + i * count;
68
4
        for (j = 0; j < count; j++)
69
4
          p += -bp[j] * logf(ap[j]);
70
0
        c->data.f32[i] = p;
71
1
      } parallel_endfor
72
1
    }
73
9
  } else 
if (2
b->info.datatype == CCV_32S2
) {
74
5
    for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && b->info.dim[i] > 0; 
i++3
)
75
3
      { assert(b->info.dim[i] == c->info.dim[i]); }
76
2
    const float trim0 = cmd.info.label_smoothing.trim0;
77
2
    const float trim1 = cmd.info.label_smoothing.trim1;
78
2
    if (trim0 == 0 && 
trim1 == 11
)
79
1
    {
80
1
      parallel_for(i, batch_size) {
81
0
        const int label = b->data.i32[i];
82
0
        assert(label >= 0 && label < count);
83
2
        const float p = a->data.f32[i * count + label];
84
2
        c->data.f32[i] = -logf(p);
85
3
      } parallel_endfor
86
1
    } else {
87
1
      parallel_for(i, batch_size) {
88
0
        const int label = b->data.i32[i];
89
0
        assert(label >= 0 && label < count);
90
1
        int j;
91
1
        float p = 0;
92
1
        float* const ap = a->data.f32 + i * count;
93
3
        for (j = 0; j < label; 
j++2
)
94
2
          p += -trim0 * logf(ap[j]);
95
1
        p += -trim1 * logf(ap[label]);
96
2
        for (j = label + 1; j < count; 
j++1
)
97
1
          p += -trim0 * logf(ap[j]);
98
1
        c->data.f32[i] = p;
99
2
      } parallel_endfor
100
1
    }
101
2
  }
102
11
  return CCV_NNC_EXEC_SUCCESS;
103
11
}
104
105
static int _ccv_nnc_categorical_crossentropy_back(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
106
8
{
107
8
  assert(input_size >= 3);
108
8
  assert(output_size >= 1);
109
8
  const ccv_nnc_tensor_t* g = inputs[0];
110
8
  assert(!g || !CCV_IS_TENSOR_VIEW(g));
111
8
  const ccv_nnc_tensor_t* a = inputs[1];
112
8
  assert(!CCV_IS_TENSOR_VIEW(a));
113
8
  const ccv_nnc_tensor_t* b = inputs[2];
114
8
  assert(!CCV_IS_TENSOR_VIEW(b));
115
8
  ccv_nnc_tensor_t* h = outputs[0];
116
8
  assert(!CCV_IS_TENSOR_VIEW(h));
117
8
  const int axis_count = ccv_nnc_tensor_nd(a->info.dim);
118
8
  const int batch_size = axis_count < 2 ? 
10
: a->info.dim[0];
119
8
  const int count = ccv_nnc_tensor_count(a->info) / batch_size;
120
8
  int i;
121
8
  if (g)
122
8
  {
123
8
    if (b->info.datatype == CCV_32F)
124
5
    {
125
5
      // If has more than 1 axis, then the range is the channel count. Otherwise, if our batch size is 1, then the range is
126
5
      // the channel count. Otherwise, the range is 1 (and the only axis is the batch size).
127
5
      const int range = ccv_nnc_tensor_nd(b->info.dim) > 1 ? 
ccv_nnc_tensor_get_c(b->info)1
:
(batch_size == 1 4
?
b->info.dim[0]0
:
14
);
128
5
      if (range == 1)
129
4
      {
130
12
        for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && a->info.dim[i] > 0; 
i++8
)
131
8
          { assert(a->info.dim[i] == h->info.dim[i]); }
132
4
        const float trim0 = cmd.info.label_smoothing.trim0;
133
4
        const float trim1 = cmd.info.label_smoothing.trim1;
134
4
        if (trim0 == 0 && 
trim1 == 12
)
135
2
        {
136
2
          parallel_for(i, batch_size) {
137
0
            int j;
138
0
            const float gp = g->data.f32[i];
139
0
            const int label = (int)(b->data.f32[i] + 0.5);
140
0
            float* const hp = h->data.f32 + i * count;
141
364
            for (j = 0; j < count; j++)
142
364
              hp[j] = 0;
143
0
            const float p = a->data.f32[i * count + label];
144
0
            hp[label] = -gp / p;
145
2
          } parallel_endfor
146
2
        } else {
147
2
          parallel_for(i, batch_size) {
148
0
            int j;
149
0
            const float gp = g->data.f32[i];
150
0
            const int label = (int)(b->data.f32[i] + 0.5);
151
0
            float* const hp = h->data.f32 + i * count;
152
0
            float* const ap = a->data.f32 + i * count;
153
282
            for (j = 0; j < label; j++)
154
282
              hp[j] = -gp * trim0 / ap[j];
155
0
            hp[label] = -gp * trim1 / ap[label];
156
372
            for (j = label + 1; j < count; j++)
157
372
              hp[j] = -gp * trim0 / ap[j];
158
2
          } parallel_endfor
159
2
        }
160
4
      } else {
161
1
        assert(range == count);
162
1
        parallel_for(i, batch_size) {
163
0
          int j;
164
0
          const float gp = g->data.f32[i];
165
0
          float* const hp = h->data.f32 + i * count;
166
0
          float* const ap = a->data.f32 + i * count;
167
0
          float* const bp = b->data.f32 + i * count;
168
6
          for (j = 0; j < count; j++)
169
6
            hp[j] = -gp * bp[j] / ap[j];
170
1
        } parallel_endfor
171
1
      }
172
5
    } else 
if (3
b->info.datatype == CCV_32S3
) {
173
9
      for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && a->info.dim[i] > 0; 
i++6
)
174
6
        { assert(a->info.dim[i] == h->info.dim[i]); }
175
3
      const float trim0 = cmd.info.label_smoothing.trim0;
176
3
      const float trim1 = cmd.info.label_smoothing.trim1;
177
3
      if (trim0 == 0 && 
trim1 == 12
)
178
2
      {
179
2
        parallel_for(i, batch_size) {
180
0
          int j;
181
0
          const float gp = g->data.f32[i];
182
0
          const int label = b->data.i32[i];
183
0
          float* const hp = h->data.f32 + i * count;
184
9
          for (j = 0; j < count; j++)
185
9
            hp[j] = 0;
186
0
          const float p = a->data.f32[i * count + label];
187
0
          hp[label] = -gp / p;
188
2
        } parallel_endfor
189
2
      } else {
190
1
        parallel_for(i, batch_size) {
191
0
          int j;
192
0
          const float gp = g->data.f32[i];
193
0
          const int label = b->data.i32[i];
194
0
          float* const hp = h->data.f32 + i * count;
195
0
          float* const ap = a->data.f32 + i * count;
196
2
          for (j = 0; j < label; j++)
197
2
            hp[j] = -gp * trim0 / ap[j];
198
0
          hp[label] = -gp * trim1 / ap[label];
199
1
          for (j = label + 1; j < count; j++)
200
1
            hp[j] = -gp * trim0 / ap[j];
201
1
        } parallel_endfor
202
1
      }
203
3
    }
204
8
  } else {
205
0
    if (b->info.datatype == CCV_32F)
206
0
    {
207
0
      // If has more than 1 axis, then the range is the channel count. Otherwise, if our batch size is 1, then the range is
208
0
      // the channel count. Otherwise, the range is 1 (and the only axis is the batch size).
209
0
      const int range = ccv_nnc_tensor_nd(b->info.dim) > 1 ? ccv_nnc_tensor_get_c(b->info) : (batch_size == 1 ? b->info.dim[0] : 1);
210
0
      if (range == 1)
211
0
      {
212
0
        for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && a->info.dim[i] > 0; i++)
213
0
          { assert(a->info.dim[i] == h->info.dim[i]); }
214
0
        const float trim0 = cmd.info.label_smoothing.trim0;
215
0
        const float trim1 = cmd.info.label_smoothing.trim1;
216
0
        if (trim0 == 0 && trim1 == 1)
217
0
        {
218
0
          parallel_for(i, batch_size) {
219
0
            int j;
220
0
            const int label = (int)(b->data.f32[i] + 0.5);
221
0
            float* const hp = h->data.f32 + i * count;
222
0
            for (j = 0; j < count; j++)
223
0
              hp[j] = 0;
224
0
            const float p = a->data.f32[i * count + label];
225
0
            hp[label] = -1. / p;
226
0
          } parallel_endfor
227
0
        } else {
228
0
          parallel_for(i, batch_size) {
229
0
            int j;
230
0
            const int label = (int)(b->data.f32[i] + 0.5);
231
0
            float* const hp = h->data.f32 + i * count;
232
0
            float* const ap = a->data.f32 + i * count;
233
0
            for (j = 0; j < label; j++)
234
0
              hp[j] = -trim0 / ap[j];
235
0
            hp[label] = -trim1 / ap[label];
236
0
            for (j = label + 1; j < count; j++)
237
0
              hp[j] = -trim0 / ap[j];
238
0
          } parallel_endfor
239
0
        }
240
0
      } else {
241
0
        assert(range == count);
242
0
        parallel_for(i, batch_size) {
243
0
          int j;
244
0
          float* const hp = h->data.f32 + i * count;
245
0
          float* const ap = a->data.f32 + i * count;
246
0
          float* const bp = b->data.f32 + i * count;
247
0
          for (j = 0; j < count; j++)
248
0
            hp[j] = -bp[j] / ap[j];
249
0
        } parallel_endfor
250
0
      }
251
0
    } else if (b->info.datatype == CCV_32S) {
252
0
      const float trim0 = cmd.info.label_smoothing.trim0;
253
0
      const float trim1 = cmd.info.label_smoothing.trim1;
254
0
      if (trim0 == 0 && trim1 == 1)
255
0
      {
256
0
        for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && a->info.dim[i] > 0; i++)
257
0
          { assert(a->info.dim[i] == h->info.dim[i]); }
258
0
        parallel_for(i, batch_size) {
259
0
          int j;
260
0
          const int label = b->data.i32[i];
261
0
          float* const hp = h->data.f32 + i * count;
262
0
          for (j = 0; j < count; j++)
263
0
            hp[j] = 0;
264
0
          const float p = a->data.f32[i * count + label];
265
0
          hp[label] = -1. / p;
266
0
        } parallel_endfor
267
0
      } else {
268
0
        for (i = 0; i < CCV_NNC_MAX_DIM_ALLOC && a->info.dim[i] > 0; i++)
269
0
          { assert(a->info.dim[i] == h->info.dim[i]); }
270
0
        parallel_for(i, batch_size) {
271
0
          int j;
272
0
          const int label = b->data.i32[i];
273
0
          float* const hp = h->data.f32 + i * count;
274
0
          float* const ap = a->data.f32 + i * count;
275
0
          for (j = 0; j < label; j++)
276
0
            hp[j] = -trim0 / ap[j];
277
0
          hp[label] = -trim1 / ap[label];
278
0
          for (j = label + 1; j < count; j++)
279
0
            hp[j] = -trim0 / ap[j];
280
0
        } parallel_endfor
281
0
      }
282
0
    }
283
0
  }
284
8
  return CCV_NNC_EXEC_SUCCESS;
285
8
}
286
287
REGISTER_COMMAND_BACKEND(CCV_NNC_CATEGORICAL_CROSSENTROPY_FORWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
288
1
{
289
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
290
1
  registry->tensor_datatypes = CCV_32F | CCV_32S;
291
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
292
1
  registry->algorithms = 1;
293
1
  registry->exec = _ccv_nnc_categorical_crossentropy_forw;
294
1
}
295
296
REGISTER_COMMAND_BACKEND(CCV_NNC_CATEGORICAL_CROSSENTROPY_BACKWARD, CCV_NNC_BACKEND_CPU_REF)(ccv_nnc_cmd_backend_registry_t* const registry)
297
1
{
298
1
  registry->tensor_formats = CCV_TENSOR_FORMAT_NHWC | CCV_TENSOR_FORMAT_NCHW;
299
1
  registry->tensor_datatypes = CCV_32F | CCV_32S;
300
1
  registry->tensor_memory = CCV_TENSOR_CPU_MEMORY;
301
1
  registry->algorithms = 1;
302
1
  registry->exec = _ccv_nnc_categorical_crossentropy_back;
303
1
}