| | |
| | | { |
| | | int i, j; |
| | | for(i = 0; i < l.outputs; ++i){ |
| | | float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); |
| | | float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001); |
| | | for(j = 0; j < l.inputs; ++j){ |
| | | l.weights[i*l.inputs + j] *= scale; |
| | | } |
| | | l.biases[i] -= l.rolling_mean[i] * scale; |
| | | l.scales[i] = 1; |
| | | l.rolling_mean[i] = 0; |
| | | l.rolling_variance[i] = 1; |
| | | } |
| | | } |
| | | |
| | | |
| | | void statistics_connected_layer(layer l) |
| | | { |
| | | if(l.batch_normalize){ |
| | | printf("Scales "); |
| | | print_statistics(l.scales, l.outputs); |
| | | printf("Rolling Mean "); |
| | | print_statistics(l.rolling_mean, l.outputs); |
| | | printf("Rolling Variance "); |
| | | print_statistics(l.rolling_variance, l.outputs); |
| | | } |
| | | printf("Biases "); |
| | | print_statistics(l.biases, l.outputs); |
| | | printf("Weights "); |
| | | print_statistics(l.weights, l.outputs); |
| | | } |
| | | |
| | | #ifdef GPU |
| | | |
| | | void pull_connected_layer(connected_layer l) |
| | |
| | | axpy_ongpu(l.outputs, 1, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1); |
| | | } |
| | | activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation); |
| | | |
| | | } |
| | | |
| | | void backward_connected_layer_gpu(connected_layer l, network_state state) |
| | | { |
| | | int i; |
| | | constrain_ongpu(l.outputs*l.batch, 5, l.delta_gpu, 1); |
| | | constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1); |
| | | gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu); |
| | | for(i = 0; i < l.batch; ++i){ |
| | | axpy_ongpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1); |