From 9b1774bd39d65614cdbd2d4e3815086298008911 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Wed, 06 Nov 2013 18:37:37 +0000
Subject: [PATCH] Connected layers work forward and backward!
---
src/network.c | 73 ++++++++++++
src/image.c | 2
.gitignore | 1
src/network.h | 3
Makefile | 4
src/convolutional_layer.c | 29 +++-
src/activations.h | 10 +
src/connected_layer.c | 86 +++++++------
src/connected_layer.h | 14 +
src/activations.c | 32 +++++
src/convolutional_layer.h | 3
src/tests.c | 61 ++++++++-
12 files changed, 248 insertions(+), 70 deletions(-)
diff --git a/.gitignore b/.gitignore
index da9802e..d15b2e8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,6 +3,7 @@
*.csv
images/
opencv/
+convnet/
cnn
# OS Generated #
diff --git a/Makefile b/Makefile
index 4dce124..4cddfd5 100644
--- a/Makefile
+++ b/Makefile
@@ -1,10 +1,10 @@
CC=gcc
CFLAGS=-Wall `pkg-config --cflags opencv` -O3 -flto -ffast-math
-#CFLAGS=-Wall `pkg-config --cflags opencv` -O0 -g
+CFLAGS=-Wall `pkg-config --cflags opencv` -O0 -g
LDFLAGS=`pkg-config --libs opencv` -lm
VPATH=./src/
-OBJ=network.o image.o tests.o convolutional_layer.o connected_layer.o maxpool_layer.o
+OBJ=network.o image.o tests.o convolutional_layer.o connected_layer.o maxpool_layer.o activations.o
all: cnn
diff --git a/src/activations.c b/src/activations.c
new file mode 100644
index 0000000..aef21cb
--- /dev/null
+++ b/src/activations.c
@@ -0,0 +1,32 @@
+#include "activations.h"
+
+#include <math.h>
+
+double identity_activation(double x)
+{
+ return x;
+}
+double identity_gradient(double x)
+{
+ return 1;
+}
+
+double relu_activation(double x)
+{
+ return x*(x>0);
+}
+double relu_gradient(double x)
+{
+ return (x>=0);
+}
+
+double sigmoid_activation(double x)
+{
+ return 1./(1.+exp(-x));
+}
+
+double sigmoid_gradient(double x)
+{
+ return x*(1.-x);
+}
+
diff --git a/src/activations.h b/src/activations.h
new file mode 100644
index 0000000..294cf28
--- /dev/null
+++ b/src/activations.h
@@ -0,0 +1,10 @@
+typedef enum{
+ SIGMOID, RELU, IDENTITY
+}ACTIVATOR_TYPE;
+
+double relu_activation(double x);
+double relu_gradient(double x);
+double sigmoid_activation(double x);
+double sigmoid_gradient(double x);
+double identity_activation(double x);
+double identity_gradient(double x);
diff --git a/src/connected_layer.c b/src/connected_layer.c
index fe904ba..11143b9 100644
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@@ -1,19 +1,10 @@
#include "connected_layer.h"
+#include <math.h>
#include <stdlib.h>
#include <string.h>
-double activation(double x)
-{
- return x*(x>0);
-}
-
-double gradient(double x)
-{
- return (x>=0);
-}
-
-connected_layer make_connected_layer(int inputs, int outputs)
+connected_layer make_connected_layer(int inputs, int outputs, ACTIVATOR_TYPE activator)
{
int i;
connected_layer layer;
@@ -32,6 +23,17 @@
for(i = 0; i < outputs; ++i)
layer.biases[i] = (double)rand()/RAND_MAX;
+ if(activator == SIGMOID){
+ layer.activation = sigmoid_activation;
+ layer.gradient = sigmoid_gradient;
+ }else if(activator == RELU){
+ layer.activation = relu_activation;
+ layer.gradient = relu_gradient;
+ }else if(activator == IDENTITY){
+ layer.activation = identity_activation;
+ layer.gradient = identity_gradient;
+ }
+
return layer;
}
@@ -41,39 +43,16 @@
for(i = 0; i < layer.outputs; ++i){
layer.output[i] = layer.biases[i];
for(j = 0; j < layer.inputs; ++j){
- layer.output[i] += input[j]*layer.weights[i*layer.outputs + j];
+ layer.output[i] += input[j]*layer.weights[i*layer.inputs + j];
}
- layer.output[i] = activation(layer.output[i]);
+ layer.output[i] = layer.activation(layer.output[i]);
}
}
-void backpropagate_connected_layer(double *input, connected_layer layer)
+void learn_connected_layer(double *input, connected_layer layer)
{
- int i, j;
- double *old_input = calloc(layer.inputs, sizeof(double));
- memcpy(old_input, input, layer.inputs*sizeof(double));
- memset(input, 0, layer.inputs*sizeof(double));
-
- for(i = 0; i < layer.outputs; ++i){
- for(j = 0; j < layer.inputs; ++j){
- input[j] += layer.output[i]*layer.weights[i*layer.outputs + j];
- }
- }
- for(j = 0; j < layer.inputs; ++j){
- input[j] = input[j]*gradient(old_input[j]);
- }
- free(old_input);
-}
-
-void calculate_updates_connected_layer(double *input, connected_layer layer)
-{
- int i, j;
- for(i = 0; i < layer.outputs; ++i){
- layer.bias_updates[i] += layer.output[i];
- for(j = 0; j < layer.inputs; ++j){
- layer.weight_updates[i*layer.outputs + j] += layer.output[i]*input[j];
- }
- }
+ calculate_update_connected_layer(input, layer);
+ backpropagate_connected_layer(input, layer);
}
void update_connected_layer(connected_layer layer, double step)
@@ -82,11 +61,36 @@
for(i = 0; i < layer.outputs; ++i){
layer.biases[i] += step*layer.bias_updates[i];
for(j = 0; j < layer.inputs; ++j){
- int index = i*layer.outputs+j;
- layer.weights[index] = layer.weight_updates[index];
+ int index = i*layer.inputs+j;
+ layer.weights[index] += step*layer.weight_updates[index];
}
}
memset(layer.bias_updates, 0, layer.outputs*sizeof(double));
memset(layer.weight_updates, 0, layer.outputs*layer.inputs*sizeof(double));
}
+void calculate_update_connected_layer(double *input, connected_layer layer)
+{
+ int i, j;
+ for(i = 0; i < layer.outputs; ++i){
+ layer.bias_updates[i] += layer.output[i];
+ for(j = 0; j < layer.inputs; ++j){
+ layer.weight_updates[i*layer.inputs + j] += layer.output[i]*input[j];
+ }
+ }
+}
+
+void backpropagate_connected_layer(double *input, connected_layer layer)
+{
+ int i, j;
+
+ for(j = 0; j < layer.inputs; ++j){
+ double grad = layer.gradient(input[j]);
+ input[j] = 0;
+ for(i = 0; i < layer.outputs; ++i){
+ input[j] += layer.output[i]*layer.weights[i*layer.inputs + j];
+ }
+ input[j] *= grad;
+ }
+}
+
diff --git a/src/connected_layer.h b/src/connected_layer.h
index e403b0f..4f0e42c 100644
--- a/src/connected_layer.h
+++ b/src/connected_layer.h
@@ -1,6 +1,8 @@
#ifndef CONNECTED_LAYER_H
#define CONNECTED_LAYER_H
+#include "activations.h"
+
typedef struct{
int inputs;
int outputs;
@@ -9,13 +11,19 @@
double *weight_updates;
double *bias_updates;
double *output;
+
+ double (* activation)();
+ double (* gradient)();
} connected_layer;
-connected_layer make_connected_layer(int inputs, int outputs);
+connected_layer make_connected_layer(int inputs, int outputs, ACTIVATOR_TYPE activator);
+
void run_connected_layer(double *input, connected_layer layer);
-void backpropagate_connected_layer(double *input, connected_layer layer);
-void calculate_updates_connected_layer(double *input, connected_layer layer);
+void learn_connected_layer(double *input, connected_layer layer);
void update_connected_layer(connected_layer layer, double step);
+void backpropagate_connected_layer(double *input, connected_layer layer);
+void calculate_update_connected_layer(double *input, connected_layer layer);
+
#endif
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index f83622b..8053133 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -33,12 +33,12 @@
for(i = 0; i < layer.n; ++i){
convolve(input, layer.kernels[i], layer.stride, i, layer.output);
}
- for(i = 0; i < input.h*input.w*input.c; ++i){
- input.data[i] = convolution_activation(input.data[i]);
+ for(i = 0; i < layer.output.h*layer.output.w*layer.output.c; ++i){
+ layer.output.data[i] = convolution_activation(layer.output.data[i]);
}
}
-void backpropagate_layer(image input, convolutional_layer layer)
+void backpropagate_convolutional_layer(image input, convolutional_layer layer)
{
int i;
zero_image(input);
@@ -47,7 +47,7 @@
}
}
-void backpropagate_layer_convolve(image input, convolutional_layer layer)
+void backpropagate_convolutional_layer_convolve(image input, convolutional_layer layer)
{
int i,j;
for(i = 0; i < layer.n; ++i){
@@ -67,20 +67,29 @@
}
}
-void error_convolutional_layer(image input, convolutional_layer layer)
+void learn_convolutional_layer(image input, convolutional_layer layer)
{
int i;
for(i = 0; i < layer.n; ++i){
kernel_update(input, layer.kernel_updates[i], layer.stride, i, layer.output);
}
image old_input = copy_image(input);
- zero_image(input);
- for(i = 0; i < layer.n; ++i){
- back_convolve(input, layer.kernels[i], layer.stride, i, layer.output);
- }
+ backpropagate_convolutional_layer(input, layer);
for(i = 0; i < input.h*input.w*input.c; ++i){
- input.data[i] = input.data[i]*convolution_gradient(input.data[i]);
+ input.data[i] *= convolution_gradient(old_input.data[i]);
}
free_image(old_input);
}
+void update_convolutional_layer(convolutional_layer layer, double step)
+{
+ int i,j;
+ for(i = 0; i < layer.n; ++i){
+ int pixels = layer.kernels[i].h*layer.kernels[i].w*layer.kernels[i].c;
+ for(j = 0; j < pixels; ++j){
+ layer.kernels[i].data[j] += step*layer.kernel_updates[i].data[j];
+ }
+ zero_image(layer.kernel_updates[i]);
+ }
+}
+
diff --git a/src/convolutional_layer.h b/src/convolutional_layer.h
index b42f5e9..2428715 100644
--- a/src/convolutional_layer.h
+++ b/src/convolutional_layer.h
@@ -14,8 +14,7 @@
convolutional_layer make_convolutional_layer(int w, int h, int c, int n, int size, int stride);
void run_convolutional_layer(const image input, const convolutional_layer layer);
-void backpropagate_layer(image input, convolutional_layer layer);
-void backpropagate_layer_convolve(image input, convolutional_layer layer);
+void learn_convolutional_layer(image input, convolutional_layer layer);
#endif
diff --git a/src/image.c b/src/image.c
index 13c6b31..a1aa8a7 100644
--- a/src/image.c
+++ b/src/image.c
@@ -132,7 +132,7 @@
image out = make_image(h,w,c);
int i;
for(i = 0; i < h*w*c; ++i){
- out.data[i] = (double)rand()/RAND_MAX;
+ out.data[i] = .5-(double)rand()/RAND_MAX;
}
return out;
}
diff --git a/src/network.c b/src/network.c
index e55535c..0a74b63 100644
--- a/src/network.c
+++ b/src/network.c
@@ -8,7 +8,7 @@
void run_network(image input, network net)
{
int i;
- double *input_d = 0;
+ double *input_d = input.data;
for(i = 0; i < net.n; ++i){
if(net.types[i] == CONVOLUTIONAL){
convolutional_layer layer = *(convolutional_layer *)net.layers[i];
@@ -30,6 +30,77 @@
}
}
+void update_network(network net, double step)
+{
+ int i;
+ for(i = 0; i < net.n; ++i){
+ if(net.types[i] == CONVOLUTIONAL){
+ convolutional_layer layer = *(convolutional_layer *)net.layers[i];
+ update_convolutional_layer(layer, step);
+ }
+ else if(net.types[i] == MAXPOOL){
+ //maxpool_layer layer = *(maxpool_layer *)net.layers[i];
+ }
+ else if(net.types[i] == CONNECTED){
+ connected_layer layer = *(connected_layer *)net.layers[i];
+ update_connected_layer(layer, step);
+ }
+ }
+}
+
+void learn_network(image input, network net)
+{
+ int i;
+ image prev;
+ double *prev_p;
+ for(i = net.n-1; i >= 0; --i){
+ if(i == 0){
+ prev = input;
+ prev_p = prev.data;
+ } else if(net.types[i-1] == CONVOLUTIONAL){
+ convolutional_layer layer = *(convolutional_layer *)net.layers[i-1];
+ prev = layer.output;
+ prev_p = prev.data;
+ } else if(net.types[i-1] == MAXPOOL){
+ maxpool_layer layer = *(maxpool_layer *)net.layers[i-1];
+ prev = layer.output;
+ prev_p = prev.data;
+ } else if(net.types[i-1] == CONNECTED){
+ connected_layer layer = *(connected_layer *)net.layers[i-1];
+ prev_p = layer.output;
+ }
+
+ if(net.types[i] == CONVOLUTIONAL){
+ convolutional_layer layer = *(convolutional_layer *)net.layers[i];
+ learn_convolutional_layer(prev, layer);
+ }
+ else if(net.types[i] == MAXPOOL){
+ //maxpool_layer layer = *(maxpool_layer *)net.layers[i];
+ }
+ else if(net.types[i] == CONNECTED){
+ connected_layer layer = *(connected_layer *)net.layers[i];
+ learn_connected_layer(prev_p, layer);
+ }
+ }
+}
+
+double *get_network_output(network net)
+{
+ int i = net.n-1;
+ if(net.types[i] == CONVOLUTIONAL){
+ convolutional_layer layer = *(convolutional_layer *)net.layers[i];
+ return layer.output.data;
+ }
+ else if(net.types[i] == MAXPOOL){
+ maxpool_layer layer = *(maxpool_layer *)net.layers[i];
+ return layer.output.data;
+ }
+ else if(net.types[i] == CONNECTED){
+ connected_layer layer = *(connected_layer *)net.layers[i];
+ return layer.output;
+ }
+ return 0;
+}
image get_network_image(network net)
{
int i;
diff --git a/src/network.h b/src/network.h
index 826eafa..2fb9225 100644
--- a/src/network.h
+++ b/src/network.h
@@ -17,6 +17,9 @@
} network;
void run_network(image input, network net);
+double *get_network_output(network net);
+void learn_network(image input, network net);
+void update_network(network net, double step);
image get_network_image(network net);
#endif
diff --git a/src/tests.c b/src/tests.c
index 7e2539a..f2b50dc 100644
--- a/src/tests.c
+++ b/src/tests.c
@@ -34,11 +34,11 @@
void test_convolutional_layer()
{
srand(0);
- image dog = load_image("test_dog.jpg");
+ image dog = load_image("dog.jpg");
int i;
- int n = 5;
+ int n = 3;
int stride = 1;
- int size = 8;
+ int size = 3;
convolutional_layer layer = make_convolutional_layer(dog.h, dog.w, dog.c, n, size, stride);
char buff[256];
for(i = 0; i < n; ++i) {
@@ -47,7 +47,7 @@
}
run_convolutional_layer(dog, layer);
- maxpool_layer mlayer = make_maxpool_layer(layer.output.h, layer.output.w, layer.output.c, 3);
+ maxpool_layer mlayer = make_maxpool_layer(layer.output.h, layer.output.w, layer.output.c, 2);
run_maxpool_layer(layer.output,mlayer);
show_image_layers(mlayer.output, "Test Maxpool Layer");
@@ -128,9 +128,9 @@
n = 128;
convolutional_layer cl5 = make_convolutional_layer(cl4.output.h, cl4.output.w, cl4.output.c, n, size, stride);
maxpool_layer ml3 = make_maxpool_layer(cl5.output.h, cl5.output.w, cl5.output.c, 4);
- connected_layer nl = make_connected_layer(ml3.output.h*ml3.output.w*ml3.output.c, 4096);
- connected_layer nl2 = make_connected_layer(4096, 4096);
- connected_layer nl3 = make_connected_layer(4096, 1000);
+ connected_layer nl = make_connected_layer(ml3.output.h*ml3.output.w*ml3.output.c, 4096, RELU);
+ connected_layer nl2 = make_connected_layer(4096, 4096, RELU);
+ connected_layer nl3 = make_connected_layer(4096, 1000, RELU);
net.layers[0] = &cl;
net.layers[1] = &ml;
@@ -155,6 +155,7 @@
show_image_layers(get_network_image(net), "Test Network Layer");
}
+
void test_backpropagate()
{
int n = 3;
@@ -169,13 +170,13 @@
int i;
clock_t start = clock(), end;
for(i = 0; i < 100; ++i){
- backpropagate_layer(dog_copy, cl);
+ backpropagate_convolutional_layer(dog_copy, cl);
}
end = clock();
printf("Backpropagate: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
start = clock();
for(i = 0; i < 100; ++i){
- backpropagate_layer_convolve(dog, cl);
+ backpropagate_convolutional_layer_convolve(dog, cl);
}
end = clock();
printf("Backpropagate Using Convolutions: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC);
@@ -185,14 +186,54 @@
show_image(dog, "Test Backpropagate Difference");
}
+void test_ann()
+{
+ network net;
+ net.n = 3;
+ net.layers = calloc(net.n, sizeof(void *));
+ net.types = calloc(net.n, sizeof(LAYER_TYPE));
+ net.types[0] = CONNECTED;
+ net.types[1] = CONNECTED;
+ net.types[2] = CONNECTED;
+
+ connected_layer nl = make_connected_layer(1, 20, RELU);
+ connected_layer nl2 = make_connected_layer(20, 20, RELU);
+ connected_layer nl3 = make_connected_layer(20, 1, RELU);
+
+ net.layers[0] = &nl;
+ net.layers[1] = &nl2;
+ net.layers[2] = &nl3;
+
+ image t = make_image(1,1,1);
+ int count = 0;
+
+ double avgerr = 0;
+ while(1){
+ double v = ((double)rand()/RAND_MAX);
+ double truth = v*v;
+ set_pixel(t,0,0,0,v);
+ run_network(t, net);
+ double *out = get_network_output(net);
+ double err = pow((out[0]-truth),2.);
+ avgerr = .99 * avgerr + .01 * err;
+ //if(++count % 100000 == 0) printf("%f\n", avgerr);
+ if(++count % 100000 == 0) printf("%f %f :%f AVG %f \n", truth, out[0], err, avgerr);
+ out[0] = truth - out[0];
+ learn_network(t, net);
+ update_network(net, .001);
+ }
+
+}
+
int main()
{
//test_backpropagate();
+ test_ann();
//test_convolve();
//test_upsample();
//test_rotate();
//test_load();
- test_network();
+ //test_network();
//test_convolutional_layer();
//test_color();
cvWaitKey(0);
--
Gitblit v1.10.0