| | |
| | | #include <stdlib.h> |
| | | #include <string.h> |
| | | |
| | | void increment_layer(layer *l, int steps) |
| | | static void increment_layer(layer *l, int steps) |
| | | { |
| | | int num = l->outputs*l->batch*steps; |
| | | l->output += num; |
| | |
| | | l.output = l.output_layer->output; |
| | | l.delta = l.output_layer->delta; |
| | | |
| | | l.forward = forward_rnn_layer; |
| | | l.backward = backward_rnn_layer; |
| | | l.update = update_rnn_layer; |
| | | #ifdef GPU |
| | | l.forward_gpu = forward_rnn_layer_gpu; |
| | | l.backward_gpu = backward_rnn_layer_gpu; |
| | | l.update_gpu = update_rnn_layer_gpu; |
| | | l.state_gpu = cuda_make_array(l.state, batch*hidden*(steps+1)); |
| | | l.output_gpu = l.output_layer->output_gpu; |
| | | l.delta_gpu = l.output_layer->delta_gpu; |
| | |
| | | increment_layer(&output_layer, l.steps - 1); |
| | | l.state_gpu += l.hidden*l.batch*l.steps; |
| | | for (i = l.steps-1; i >= 0; --i) { |
| | | copy_ongpu(l.hidden * l.batch, input_layer.output_gpu, 1, l.state_gpu, 1); |
| | | axpy_ongpu(l.hidden * l.batch, 1, self_layer.output_gpu, 1, l.state_gpu, 1); |
| | | |
| | | s.input = l.state_gpu; |
| | | s.delta = self_layer.delta_gpu; |
| | |
| | | |
| | | l.state_gpu -= l.hidden*l.batch; |
| | | |
| | | copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); |
| | | |
| | | s.input = l.state_gpu; |
| | | s.delta = self_layer.delta_gpu - l.hidden*l.batch; |
| | | if (i == 0) s.delta = 0; |
| | | backward_connected_layer_gpu(self_layer, s); |
| | | |
| | | copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); |
| | | //copy_ongpu(l.hidden*l.batch, self_layer.delta_gpu, 1, input_layer.delta_gpu, 1); |
| | | if (i > 0 && l.shortcut) axpy_ongpu(l.hidden*l.batch, 1, self_layer.delta_gpu, 1, self_layer.delta_gpu - l.hidden*l.batch, 1); |
| | | s.input = state.input + i*l.inputs*l.batch; |
| | | if(state.delta) s.delta = state.delta + i*l.inputs*l.batch; |