From d7a30ada7e82daf7ce5ca91acac15c1723188fc5 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Mon, 16 Jan 2017 09:51:42 +0000
Subject: [PATCH] Fixed behavior if missing library cudnn.lib
---
src/normalization_layer.c | 150 ++++++++++++++++++++++++++++++++++++++++++++++++++
1 files changed, 150 insertions(+), 0 deletions(-)
diff --git a/src/normalization_layer.c b/src/normalization_layer.c
new file mode 100644
index 0000000..069a079
--- /dev/null
+++ b/src/normalization_layer.c
@@ -0,0 +1,150 @@
+#include "normalization_layer.h"
+#include "blas.h"
+#include <stdio.h>
+
+layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa)
+{
+ fprintf(stderr, "Local Response Normalization Layer: %d x %d x %d image, %d size\n", w,h,c,size);
+ layer layer = {0};
+ layer.type = NORMALIZATION;
+ layer.batch = batch;
+ layer.h = layer.out_h = h;
+ layer.w = layer.out_w = w;
+ layer.c = layer.out_c = c;
+ layer.kappa = kappa;
+ layer.size = size;
+ layer.alpha = alpha;
+ layer.beta = beta;
+ layer.output = calloc(h * w * c * batch, sizeof(float));
+ layer.delta = calloc(h * w * c * batch, sizeof(float));
+ layer.squared = calloc(h * w * c * batch, sizeof(float));
+ layer.norms = calloc(h * w * c * batch, sizeof(float));
+ layer.inputs = w*h*c;
+ layer.outputs = layer.inputs;
+
+ layer.forward = forward_normalization_layer;
+ layer.backward = backward_normalization_layer;
+ #ifdef GPU
+ layer.forward_gpu = forward_normalization_layer_gpu;
+ layer.backward_gpu = backward_normalization_layer_gpu;
+
+ layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch);
+ layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch);
+ layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch);
+ layer.norms_gpu = cuda_make_array(layer.norms, h * w * c * batch);
+ #endif
+ return layer;
+}
+
+void resize_normalization_layer(layer *layer, int w, int h)
+{
+ int c = layer->c;
+ int batch = layer->batch;
+ layer->h = h;
+ layer->w = w;
+ layer->out_h = h;
+ layer->out_w = w;
+ layer->inputs = w*h*c;
+ layer->outputs = layer->inputs;
+ layer->output = realloc(layer->output, h * w * c * batch * sizeof(float));
+ layer->delta = realloc(layer->delta, h * w * c * batch * sizeof(float));
+ layer->squared = realloc(layer->squared, h * w * c * batch * sizeof(float));
+ layer->norms = realloc(layer->norms, h * w * c * batch * sizeof(float));
+#ifdef GPU
+ cuda_free(layer->output_gpu);
+ cuda_free(layer->delta_gpu);
+ cuda_free(layer->squared_gpu);
+ cuda_free(layer->norms_gpu);
+ layer->output_gpu = cuda_make_array(layer->output, h * w * c * batch);
+ layer->delta_gpu = cuda_make_array(layer->delta, h * w * c * batch);
+ layer->squared_gpu = cuda_make_array(layer->squared, h * w * c * batch);
+ layer->norms_gpu = cuda_make_array(layer->norms, h * w * c * batch);
+#endif
+}
+
+void forward_normalization_layer(const layer layer, network_state state)
+{
+ int k,b;
+ int w = layer.w;
+ int h = layer.h;
+ int c = layer.c;
+ scal_cpu(w*h*c*layer.batch, 0, layer.squared, 1);
+
+ for(b = 0; b < layer.batch; ++b){
+ float *squared = layer.squared + w*h*c*b;
+ float *norms = layer.norms + w*h*c*b;
+ float *input = state.input + w*h*c*b;
+ pow_cpu(w*h*c, 2, input, 1, squared, 1);
+
+ const_cpu(w*h, layer.kappa, norms, 1);
+ for(k = 0; k < layer.size/2; ++k){
+ axpy_cpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1);
+ }
+
+ for(k = 1; k < layer.c; ++k){
+ copy_cpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
+ int prev = k - ((layer.size-1)/2) - 1;
+ int next = k + (layer.size/2);
+ if(prev >= 0) axpy_cpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
+ if(next < layer.c) axpy_cpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1);
+ }
+ }
+ pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, layer.output, 1);
+ mul_cpu(w*h*c*layer.batch, state.input, 1, layer.output, 1);
+}
+
+void backward_normalization_layer(const layer layer, network_state state)
+{
+ // TODO This is approximate ;-)
+ // Also this should add in to delta instead of overwritting.
+
+ int w = layer.w;
+ int h = layer.h;
+ int c = layer.c;
+ pow_cpu(w*h*c*layer.batch, -layer.beta, layer.norms, 1, state.delta, 1);
+ mul_cpu(w*h*c*layer.batch, layer.delta, 1, state.delta, 1);
+}
+
+#ifdef GPU
+void forward_normalization_layer_gpu(const layer layer, network_state state)
+{
+ int k,b;
+ int w = layer.w;
+ int h = layer.h;
+ int c = layer.c;
+ scal_ongpu(w*h*c*layer.batch, 0, layer.squared_gpu, 1);
+
+ for(b = 0; b < layer.batch; ++b){
+ float *squared = layer.squared_gpu + w*h*c*b;
+ float *norms = layer.norms_gpu + w*h*c*b;
+ float *input = state.input + w*h*c*b;
+ pow_ongpu(w*h*c, 2, input, 1, squared, 1);
+
+ const_ongpu(w*h, layer.kappa, norms, 1);
+ for(k = 0; k < layer.size/2; ++k){
+ axpy_ongpu(w*h, layer.alpha, squared + w*h*k, 1, norms, 1);
+ }
+
+ for(k = 1; k < layer.c; ++k){
+ copy_ongpu(w*h, norms + w*h*(k-1), 1, norms + w*h*k, 1);
+ int prev = k - ((layer.size-1)/2) - 1;
+ int next = k + (layer.size/2);
+ if(prev >= 0) axpy_ongpu(w*h, -layer.alpha, squared + w*h*prev, 1, norms + w*h*k, 1);
+ if(next < layer.c) axpy_ongpu(w*h, layer.alpha, squared + w*h*next, 1, norms + w*h*k, 1);
+ }
+ }
+ pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, layer.output_gpu, 1);
+ mul_ongpu(w*h*c*layer.batch, state.input, 1, layer.output_gpu, 1);
+}
+
+void backward_normalization_layer_gpu(const layer layer, network_state state)
+{
+ // TODO This is approximate ;-)
+
+ int w = layer.w;
+ int h = layer.h;
+ int c = layer.c;
+ pow_ongpu(w*h*c*layer.batch, -layer.beta, layer.norms_gpu, 1, state.delta, 1);
+ mul_ongpu(w*h*c*layer.batch, layer.delta_gpu, 1, state.delta, 1);
+}
+#endif
--
Gitblit v1.10.0