Coverage Report

Created: 2021-09-30 20:21

/home/liu/buildslave/linux-x64-runtests/build/lib/nnc/ccv_nnc_dynamic_graph_evaluate.c
Line
Count
Source (jump to first uncovered line)
1
#include "ccv_nnc.h"
2
#include "ccv_nnc_easy.h"
3
#include "ccv_nnc_internal.h"
4
#include "ccv_nnc_easy.h"
5
#include "ccv_internal.h"
6
#include "_ccv_nnc_dynamic_graph.h"
7
#include "_ccv_cnnp_model.h"
8
9
// MARK - Level-5.5 API
10
11
static int _ccv_cnnp_model_exec(const ccv_nnc_cmd_t cmd, const ccv_nnc_hint_t hint, const int flags, ccv_nnc_tensor_t* const* const inputs, const int input_size, ccv_nnc_tensor_t* const* const outputs, const int output_size, ccv_nnc_stream_context_t* const stream_context)
12
4.80k
{
13
4.80k
  ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data;
14
4.80k
  ccv_cnnp_model_t* const model = (ccv_cnnp_model_t*)stateful_exec->data;
15
4.80k
  // I cannot just use stream context, it cannot synchronize correctly based on existing coroutine implementation.
16
4.80k
  int i;
17
4.80k
  int wait_for_any_neighbor = 0;
18
4.80k
  const int parallel_count = ccv_max(model->parallel_count, 1);
19
4.80k
  if (stream_context) // Find all neighbor context and wait on them all.
20
804
    
for (i = 0; 396
i < parallel_count;
i++408
)
21
408
    {
22
408
      ccv_nnc_stream_context_t* const neighbor_context = ccv_nnc_stream_context_find_neighbor(stream_context, i);
23
408
      if (neighbor_context && 
neighbor_context != stream_context204
)
24
6
      {
25
6
        ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(neighbor_context);
26
6
        ccv_nnc_stream_context_wait_signal(stream_context, signal);
27
6
        wait_for_any_neighbor = 1;
28
6
      }
29
408
    }
30
4.80k
  co_scheduler_t* old_scheduler;
31
4.80k
  co_routine_t* old_main;
32
4.80k
  if (stream_context)
33
396
  {
34
396
    old_main = stream_context->main;
35
396
    old_scheduler = stream_context->scheduler;
36
396
    // We cannot piggyback on old scheduler.
37
396
    stream_context->scheduler = 0;
38
396
    // We will have a new main coroutine when schedule as the root.
39
396
    // Otherwise it will be scheduled after the existing routines all scheduled
40
396
    // out, and that won't be right.
41
396
    stream_context->main = 0;
42
396
  }
43
4.80k
  if (cmd.cmd == CCV_NNC_CUSTOM_FORWARD)
44
2.40k
  {
45
2.40k
    ccv_cnnp_model_evaluate(model, (ccv_cnnp_evaluate_param_t){
46
2.40k
      .requires_grad = stateful_exec->requires_grad,
47
2.40k
      .disable_outgrad = stateful_exec->disable_outgrad,
48
2.40k
      .is_test = stateful_exec->is_test,
49
2.40k
    }, inputs, input_size, outputs, output_size, 0, stream_context);
50
2.40k
  } else {
51
2.39k
    const int ingrad_size = model->output_size * parallel_count;
52
2.39k
    assert(ingrad_size <= input_size);
53
2.39k
    if (stateful_exec->disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_NONE)
54
2.39k
      ccv_cnnp_model_backward(model, inputs, ingrad_size, outputs, output_size, 0, stream_context);
55
2
    else if (stateful_exec->disable_outgrad == CCV_CNNP_DISABLE_OUTGRAD_ALL)
56
0
      ccv_cnnp_model_backward(model, inputs, ingrad_size, 0, 0, 0, stream_context);
57
2
    else {
58
2
      assert(output_size == model->input_size * parallel_count);
59
2
      int per_outgrad_size = 0;
60
2
      int i, j, k;
61
6
      for (i = 0; i < model->input_size; 
i++4
)
62
4
        if (!(stateful_exec->disable_outgrad & ((uint64_t)1 << i)))
63
2
          ++per_outgrad_size;
64
2
      assert(per_outgrad_size > 0);
65
2
      const int outgrad_size = per_outgrad_size * parallel_count;
66
2
      ccv_nnc_tensor_t* outgrads[outgrad_size];
67
10
      for (i = 0; i < parallel_count; 
i++8
)
68
24
        
for (k = 0, j = 0; 8
j < model->input_size;
j++16
)
69
16
          if (!(stateful_exec->disable_outgrad & ((uint64_t)1 << j)))
70
8
            outgrads[(k++) + i * per_outgrad_size] = outputs[j + i * model->input_size];
71
2
      ccv_cnnp_model_backward(model, inputs, ingrad_size, outgrads, outgrad_size, 0, stream_context);
72
2
    }
73
2.39k
    stateful_exec->did_backward_but_not_apply_gradients = 1;
74
2.39k
  }
75
4.80k
  if (stream_context)
76
396
  {
77
396
    // Should have new scheduler created.
78
396
    assert(stream_context->scheduler);
79
396
    // The new scheduler shouldn't be active (everything is scheduled).
80
396
    assert(!co_scheduler_is_active(stream_context->scheduler));
81
396
    co_scheduler_free(stream_context->scheduler);
82
396
    // Switch back to the old scheduler.
83
396
    stream_context->scheduler = old_scheduler;
84
396
    // The main coroutine should be cleared.
85
396
    assert(!stream_context->main);
86
396
    stream_context->main = old_main;
87
396
  }
88
4.80k
  if (wait_for_any_neighbor) // Find all neighbor context and wait on them all.
89
2
  {
90
2
    assert(stream_context);
91
2
    ccv_nnc_stream_signal_t* const signal = ccv_nnc_stream_context_emit_signal_new(stream_context);
92
10
    for (i = 0; i < parallel_count; 
i++8
)
93
8
    {
94
8
      ccv_nnc_stream_context_t* const neighbor_context = ccv_nnc_stream_context_find_neighbor(stream_context, i);
95
8
      if (neighbor_context && neighbor_context != stream_context)
96
6
        ccv_nnc_stream_context_wait_signal(neighbor_context, signal);
97
8
    }
98
2
  }
99
4.80k
  return CCV_NNC_EXEC_SUCCESS;
100
4.80k
}
101
102
static void _ccv_cnnp_model_tensor_auto(const ccv_nnc_cmd_t cmd, const ccv_nnc_tensor_param_t* const inputs, const int input_size, const ccv_nnc_hint_t hint, ccv_nnc_tensor_param_t* const outputs, const int output_size)
103
4.40k
{
104
4.40k
  ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data;
105
4.40k
  ccv_cnnp_model_t* const model = (ccv_cnnp_model_t*)stateful_exec->data;
106
4.40k
  const int parallel_count = ccv_max(model->parallel_count, 1);
107
4.40k
  const int per_input_size = input_size / parallel_count;
108
4.40k
  assert(per_input_size > 0);
109
4.40k
  assert((input_size % parallel_count) == 0);
110
4.40k
  const int per_output_size = output_size / parallel_count;
111
4.40k
  assert(per_output_size > 0);
112
4.40k
  assert((output_size % parallel_count) == 0);
113
4.40k
  int i, j;
114
8.82k
  for (i = 0; i < parallel_count; 
i++4.42k
)
115
4.42k
  {
116
4.42k
    ccv_cnnp_model_tensor_auto(model, outputs + i * per_output_size, per_output_size);
117
4.42k
    // Set device id to the corresponding inputs' device id.
118
4.42k
    const int device_id = CCV_TENSOR_GET_DEVICE_ID(inputs[i * per_input_size].type);
119
8.84k
    for (j = 0; j < per_output_size; 
j++4.42k
)
120
4.42k
      CCV_TENSOR_SET_DEVICE_ID(outputs[i * per_output_size + j].type, device_id);
121
4.42k
  }
122
4.40k
}
123
124
static void _ccv_cnnp_model_apply_gradients(const ccv_nnc_cmd_t cmd, ccv_nnc_stream_context_t* const stream_context)
125
2.39k
{
126
2.39k
  ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)cmd.data;
127
2.39k
  ccv_cnnp_model_t* const model = (ccv_cnnp_model_t*)stateful_exec->data;
128
2.39k
  ccv_cnnp_model_apply_gradients(model, stream_context);
129
2.39k
}
130
131
static ccv_nnc_stateful_cmd_vtab_t ccv_cnnp_model_exec_isa = {
132
  .super = {
133
    .exec = _ccv_cnnp_model_exec,
134
    .tensor_auto = _ccv_cnnp_model_tensor_auto,
135
  },
136
  .apply_gradients = _ccv_cnnp_model_apply_gradients,
137
};
138
139
void ccv_nnc_dynamic_graph_evaluate(ccv_nnc_dynamic_graph_t* const dynamic_graph, ccv_cnnp_model_t* const model, const int is_test, const ccv_nnc_tensor_variable_t* const inputs, const int input_size, ccv_nnc_tensor_variable_t* const outputs, const int output_size, ccv_nnc_tensor_tape_t* const tensor_tape, ccv_nnc_stream_context_t* const stream_context)
140
2.40k
{
141
2.40k
  ccv_nnc_cmd_t cmd = ccv_nnc_cmd(CCV_NNC_CUSTOM_FORWARD, (ccv_nnc_cmd_vtab_t*)&ccv_cnnp_model_exec_isa, (ccv_nnc_cmd_param_t){}, 0);
142
2.40k
  assert(input_size > 0);
143
2.40k
  const int parallel_count = ccv_max(model->parallel_count, 1);
144
2.40k
  const int per_input_size = input_size / parallel_count;
145
2.40k
  assert(per_input_size > 0);
146
2.40k
  assert((input_size % parallel_count) == 0);
147
2.40k
  int i;
148
2.40k
  if (!model->graph)
149
5
  {
150
5
    ccv_nnc_tensor_param_t input_params[per_input_size];
151
13
    for (i = 0; i < per_input_size; 
i++8
)
152
8
      input_params[i] = inputs[i]->info;
153
5
    ccv_cnnp_model_compile(model, input_params, per_input_size, CMD_NOOP(), CMD_NOOP());
154
2.40k
  } else {
155
2.40k
    assert(per_input_size == model->input_size);
156
2.40k
    ccv_nnc_tensor_param_t input_params[per_input_size];
157
2.40k
    int flag = 0;
158
4.80k
    for (i = 0; i < per_input_size; 
i++2.40k
)
159
2.40k
    {
160
2.40k
      input_params[i] = inputs[i]->info;
161
2.40k
      const ccv_nnc_tensor_param_t params = ccv_nnc_tensor_symbol_params(model->graph, model->inputs[i]);
162
2.40k
      // If these two parameters doesn't match, recompile the graph..
163
2.40k
      if (memcmp(&params, &input_params[i], sizeof(params)) != 0)
164
2.20k
        flag = 1;
165
2.40k
    }
166
2.40k
    if (flag) // Recompile the graph.
167
2.20k
      ccv_cnnp_model_compile(model, input_params, per_input_size, ccv_cnnp_model_minimizer(model), CMD_NOOP());
168
2.40k
  }
169
4.84k
  
for (i = 0; 2.40k
i < input_size;
i++2.43k
)
170
2.43k
  {
171
2.43k
    // Cannot have the parameter be a partial tensor view for model evaluation.
172
2.43k
    ccv_nnc_tensor_t* const tensor = inputs[i] ? ccv_nnc_tensor_from_variable(dynamic_graph, inputs[i], stream_context) : 
00
;
173
2.43k
    if (tensor)
174
2.43k
      { assert(!CCV_IS_TENSOR_VIEW(tensor)); }
175
2.43k
  }
176
2.40k
  if (dynamic_graph->no_grad)
177
2
  {
178
2
    ccv_nnc_stateful_exec_t stateful_exec = {
179
2
      .requires_grad = 0,
180
2
      .is_test = is_test,
181
2
      .disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL,
182
2
      .tensor_tape = tensor_tape,
183
2
      .data = model
184
2
    };
185
2
    cmd.data = &stateful_exec;
186
2
    // Parallel parameter doesn't make sense here, the parallel is defined inside the model.
187
2
    ccv_nnc_dynamic_graph_exec_ret(dynamic_graph, cmd, ccv_nnc_no_hint, 0, inputs, input_size, outputs, output_size, 0, stream_context, 0);
188
2.40k
  } else {
189
2.40k
    uint64_t disable_outgrad = 0;
190
2.40k
    int count = 0;
191
4.81k
    for (i = 0; i < per_input_size; 
i++2.40k
)
192
2.40k
      if (!inputs[i] || inputs[i]->type == CCV_NNC_TENSOR_CONSTANT)
193
3
      {
194
3
        disable_outgrad |= ((uint64_t)1 << i);
195
3
        ++count;
196
3
      }
197
2.40k
    if (count == per_input_size)
198
1
      disable_outgrad = CCV_CNNP_DISABLE_OUTGRAD_ALL;
199
2.40k
    ccv_nnc_stateful_exec_t* const stateful_exec = (ccv_nnc_stateful_exec_t*)ccmalloc(sizeof(ccv_nnc_stateful_exec_t));
200
2.40k
    cmd.data = stateful_exec;
201
2.40k
    stateful_exec->requires_grad = 1;
202
2.40k
    stateful_exec->is_test = is_test;
203
2.40k
    stateful_exec->did_backward_but_not_apply_gradients = 0;
204
2.40k
    stateful_exec->should_free = 0;
205
2.40k
    stateful_exec->disable_outgrad = disable_outgrad;
206
2.40k
    stateful_exec->tensor_tape = tensor_tape;
207
2.40k
    stateful_exec->data = model;
208
2.40k
    stateful_exec->cmd = cmd;
209
2.40k
    ccv_nnc_graph_exec_symbol_t symbol = {};
210
2.40k
    ccv_nnc_dynamic_graph_exec_ret(dynamic_graph, cmd, ccv_nnc_no_hint, 0, inputs, input_size, outputs, output_size, 0, stream_context, &symbol);
211
2.40k
    if (!symbol.graph) // This is because inputs are all constants.
212
2.40k
      
ccfree1
(stateful_exec)1
; // No one records it, there is no cmd.data refer to it.
213
2.40k
    else {
214
2.40k
      if (!dynamic_graph->stateful_execs)
215
8
      {
216
8
        dynamic_graph->stateful_execs = ccv_array_new(sizeof(ccv_nnc_stateful_exec_t*), 1, 0);
217
8
        ccv_array_push(dynamic_graph->stateful_execs, &stateful_exec);
218
8
        stateful_exec->index = dynamic_graph->stateful_execs->rnum - 1;
219
2.39k
      } else {
220
2.39k
        if (dynamic_graph->reuse_stateful_exec >= 0)
221
2.38k
        {
222
2.38k
          *(ccv_nnc_stateful_exec_t**)ccv_array_get(dynamic_graph->stateful_execs, dynamic_graph->reuse_stateful_exec) = stateful_exec;
223
2.38k
          stateful_exec->index = dynamic_graph->reuse_stateful_exec;
224
2.38k
          int flag = 0;
225
4.03k
          for (i = dynamic_graph->reuse_stateful_exec + 1; !flag && 
i < dynamic_graph->stateful_execs->rnum2.38k
;
i++1.64k
)
226
1.64k
            if (*(ccv_nnc_stateful_exec_t**)ccv_array_get(dynamic_graph->stateful_execs, i) == 0)
227
1.64k
              dynamic_graph->reuse_stateful_exec = i, flag = 1;
228
2.38k
          if (!flag) // Reset to 1.
229
742
            dynamic_graph->reuse_stateful_exec = -1;
230
2.38k
        } else {
231
9
          // Push new, no reuse available.
232
9
          ccv_array_push(dynamic_graph->stateful_execs, &stateful_exec);
233
9
          stateful_exec->index = dynamic_graph->stateful_execs->rnum - 1;
234
9
        }
235
2.39k
      }
236
2.40k
    }
237
2.40k
  }
238
2.40k
}
239