From 176d65b76583803cf10194c4c70bdc51897f2ae3 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Mon, 11 Aug 2014 19:52:07 +0000
Subject: [PATCH] Nist NIN testing multi-crop
---
src/network.c | 60 +++++++++
/dev/null | 0
src/crop_layer.c | 57 +++++++++
src/utils.h | 1
src/network.h | 4
Makefile | 2
src/convolutional_layer.c | 2
src/crop_layer.h | 22 +++
src/parser.c | 116 ++++++++++++++----
src/cnn.c | 66 ++++++++--
src/utils.c | 9 +
11 files changed, 288 insertions(+), 51 deletions(-)
diff --git a/Makefile b/Makefile
index 877fc7f..cf0cfdf 100644
--- a/Makefile
+++ b/Makefile
@@ -25,7 +25,7 @@
EXEC=cnn
OBJDIR=./obj/
-OBJ=network.o image.o cnn.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o softmax_layer.o mini_blas.o convolutional_layer.o gemm.o normalization_layer.o opencl.o im2col.o col2im.o axpy.o dropout_layer.o
+OBJ=network.o image.o cnn.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o softmax_layer.o mini_blas.o convolutional_layer.o gemm.o normalization_layer.o opencl.o im2col.o col2im.o axpy.o dropout_layer.o crop_layer.o
OBJS = $(addprefix $(OBJDIR), $(OBJ))
all: $(EXEC)
diff --git a/src/cnn.c b/src/cnn.c
index 41a7808..72ad4a1 100644
--- a/src/cnn.c
+++ b/src/cnn.c
@@ -240,9 +240,22 @@
void test_cifar10()
{
- srand(222222);
+
+ network net = parse_network_cfg("cfg/cifar10_part5.cfg");
+ data test = load_cifar10_data("data/cifar10/test_batch.bin");
+ clock_t start = clock(), end;
+ float test_acc = network_accuracy(net, test);
+ end = clock();
+ printf("%f in %f Sec\n", test_acc, (float)(end-start)/CLOCKS_PER_SEC);
+ visualize_network(net);
+ cvWaitKey(0);
+}
+
+void train_cifar10()
+{
+ srand(555555);
network net = parse_network_cfg("cfg/cifar10.cfg");
- //data test = load_cifar10_data("data/cifar10/test_batch.bin");
+ data test = load_cifar10_data("data/cifar10/test_batch.bin");
int count = 0;
int iters = 10000/net.batch;
data train = load_all_cifar10();
@@ -250,12 +263,20 @@
clock_t start = clock(), end;
float loss = train_network_sgd(net, train, iters);
end = clock();
- //visualize_network(net);
- //cvWaitKey(1000);
+ visualize_network(net);
+ cvWaitKey(5000);
//float test_acc = network_accuracy(net, test);
//printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay);
- printf("%d: Loss: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, (float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay);
+ if(count%10 == 0){
+ float test_acc = network_accuracy(net, test);
+ printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay);
+ char buff[256];
+ sprintf(buff, "/home/pjreddie/cifar/cifar2_%d.cfg", count);
+ save_network(net, buff);
+ }else{
+ printf("%d: Loss: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, (float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay);
+ }
}
free_data(train);
}
@@ -292,13 +313,25 @@
void test_nist()
{
srand(222222);
- network net = parse_network_cfg("cfg/nist.cfg");
+ network net = parse_network_cfg("cfg/nist_final.cfg");
+ data test = load_categorical_data_csv("data/mnist/mnist_test.csv",0,10);
+ translate_data_rows(test, -144);
+ clock_t start = clock(), end;
+ float test_acc = network_accuracy_multi(net, test,16);
+ end = clock();
+ printf("Accuracy: %f, Time: %lf seconds\n", test_acc,(float)(end-start)/CLOCKS_PER_SEC);
+}
+
+void train_nist()
+{
+ srand(222222);
+ network net = parse_network_cfg("cfg/nist_final.cfg");
data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10);
data test = load_categorical_data_csv("data/mnist/mnist_test.csv",0,10);
- translate_data_rows(train, -144);
- //scale_data_rows(train, 1./128);
- translate_data_rows(test, -144);
- //scale_data_rows(test, 1./128);
+ translate_data_rows(train, -144);
+ //scale_data_rows(train, 1./128);
+ translate_data_rows(test, -144);
+ //scale_data_rows(test, 1./128);
//randomize_data(train);
int count = 0;
//clock_t start = clock(), end;
@@ -311,12 +344,12 @@
//float test_acc = 0;
printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay);
/*printf("%f %f %f %f %f\n", mean_array(get_network_output_layer(net,0), 100),
- mean_array(get_network_output_layer(net,1), 100),
- mean_array(get_network_output_layer(net,2), 100),
- mean_array(get_network_output_layer(net,3), 100),
- mean_array(get_network_output_layer(net,4), 100));
- */
- //save_network(net, "cfg/nist_basic_trained.cfg");
+ mean_array(get_network_output_layer(net,1), 100),
+ mean_array(get_network_output_layer(net,2), 100),
+ mean_array(get_network_output_layer(net,3), 100),
+ mean_array(get_network_output_layer(net,4), 100));
+ */
+ save_network(net, "cfg/nist_final2.cfg");
//printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay);
//end = clock();
@@ -778,6 +811,7 @@
//test_nist_single();
test_nist();
//test_cifar10();
+ //train_cifar10();
//test_vince();
//test_full();
//tune_VOC();
diff --git a/src/convolutional_layer.c b/src/convolutional_layer.c
index afa91d4..2d4d748 100644
--- a/src/convolutional_layer.c
+++ b/src/convolutional_layer.c
@@ -166,7 +166,7 @@
*convolutional_out_width(layer);
for(b = 0; b < layer.batch; ++b){
for(i = 0; i < layer.n; ++i){
- layer.bias_updates[i] += mean_array(layer.delta+size*(i+b*layer.n), size);
+ layer.bias_updates[i] += sum_array(layer.delta+size*(i+b*layer.n), size);
}
}
}
diff --git a/src/convolutional_layer_gpu.c b/src/convolutional_layer_gpu.c
deleted file mode 100644
index e69de29..0000000
--- a/src/convolutional_layer_gpu.c
+++ /dev/null
diff --git a/src/crop_layer.c b/src/crop_layer.c
new file mode 100644
index 0000000..58e1b55
--- /dev/null
+++ b/src/crop_layer.c
@@ -0,0 +1,57 @@
+#include "crop_layer.h"
+#include <stdio.h>
+
+image get_crop_image(crop_layer layer)
+{
+ int h = layer.crop_height;
+ int w = layer.crop_width;
+ int c = layer.c;
+ return float_to_image(h,w,c,layer.output);
+}
+
+crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip)
+{
+ fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
+ crop_layer *layer = calloc(1, sizeof(crop_layer));
+ layer->batch = batch;
+ layer->h = h;
+ layer->w = w;
+ layer->c = c;
+ layer->flip = flip;
+ layer->crop_width = crop_width;
+ layer->crop_height = crop_height;
+ layer->output = calloc(crop_width*crop_height * c*batch, sizeof(float));
+ layer->delta = calloc(crop_width*crop_height * c*batch, sizeof(float));
+ return layer;
+}
+void forward_crop_layer(const crop_layer layer, float *input)
+{
+ int i,j,c,b;
+ int dh = rand()%(layer.h - layer.crop_height);
+ int dw = rand()%(layer.w - layer.crop_width);
+ int count = 0;
+ if(layer.flip && rand()%2){
+ for(b = 0; b < layer.batch; ++b){
+ for(c = 0; c < layer.c; ++c){
+ for(i = dh; i < dh+layer.crop_height; ++i){
+ for(j = dw+layer.crop_width-1; j >= dw; --j){
+ int index = j+layer.w*(i+layer.h*(c + layer.c*b));
+ layer.output[count++] = input[index];
+ }
+ }
+ }
+ }
+ }else{
+ for(b = 0; b < layer.batch; ++b){
+ for(c = 0; c < layer.c; ++c){
+ for(i = dh; i < dh+layer.crop_height; ++i){
+ for(j = dw; j < dw+layer.crop_width; ++j){
+ int index = j+layer.w*(i+layer.h*(c + layer.c*b));
+ layer.output[count++] = input[index];
+ }
+ }
+ }
+ }
+ }
+}
+
diff --git a/src/crop_layer.h b/src/crop_layer.h
new file mode 100644
index 0000000..a0cd939
--- /dev/null
+++ b/src/crop_layer.h
@@ -0,0 +1,22 @@
+#ifndef CROP_LAYER_H
+#define CROP_LAYER_H
+
+#include "image.h"
+
+typedef struct {
+ int batch;
+ int h,w,c;
+ int crop_width;
+ int crop_height;
+ int flip;
+ float *delta;
+ float *output;
+} crop_layer;
+
+image get_crop_image(crop_layer layer);
+crop_layer *make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip);
+void forward_crop_layer(const crop_layer layer, float *input);
+void backward_crop_layer(const crop_layer layer, float *input, float *delta);
+
+#endif
+
diff --git a/src/network.c b/src/network.c
index ed927a8..292bba0 100644
--- a/src/network.c
+++ b/src/network.c
@@ -4,6 +4,7 @@
#include "data.h"
#include "utils.h"
+#include "crop_layer.h"
#include "connected_layer.h"
#include "convolutional_layer.h"
#include "maxpool_layer.h"
@@ -56,6 +57,11 @@
forward_softmax_layer(layer, input);
input = layer.output;
}
+ else if(net.types[i] == CROP){
+ crop_layer layer = *(crop_layer *)net.layers[i];
+ forward_crop_layer(layer, input);
+ input = layer.output;
+ }
else if(net.types[i] == MAXPOOL){
maxpool_layer layer = *(maxpool_layer *)net.layers[i];
forward_maxpool_layer(layer, input);
@@ -85,6 +91,11 @@
forward_connected_layer(layer, input);
input = layer.output;
}
+ else if(net.types[i] == CROP){
+ crop_layer layer = *(crop_layer *)net.layers[i];
+ forward_crop_layer(layer, input);
+ input = layer.output;
+ }
else if(net.types[i] == SOFTMAX){
softmax_layer layer = *(softmax_layer *)net.layers[i];
forward_softmax_layer(layer, input);
@@ -266,12 +277,14 @@
int i,j;
float sum = 0;
+ int index = 0;
for(i = 0; i < n; ++i){
for(j = 0; j < batch; ++j){
- int index = rand()%d.X.rows;
+ index = rand()%d.X.rows;
memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float));
memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float));
}
+
float err = train_network_datum(net, X, y);
sum += err;
//train_network_datum(net, X, y);
@@ -300,6 +313,7 @@
//}
}
//printf("Accuracy: %f\n",(float) correct/n);
+ //show_image(float_to_image(32,32,3,X), "Orig");
free(X);
free(y);
return (float)sum/(n*batch);
@@ -446,6 +460,10 @@
normalization_layer layer = *(normalization_layer *)net.layers[i];
return get_normalization_image(layer);
}
+ else if(net.types[i] == CROP){
+ crop_layer layer = *(crop_layer *)net.layers[i];
+ return get_crop_image(layer);
+ }
return make_empty_image(0,0,0);
}
@@ -464,6 +482,7 @@
image *prev = 0;
int i;
char buff[256];
+ show_image(get_network_image_layer(net, 0), "Crop");
for(i = 0; i < net.n; ++i){
sprintf(buff, "Layer %d", i);
if(net.types[i] == CONVOLUTIONAL){
@@ -484,6 +503,31 @@
return out;
}
+matrix network_predict_data_multi(network net, data test, int n)
+{
+ int i,j,b,m;
+ int k = get_network_output_size(net);
+ matrix pred = make_matrix(test.X.rows, k);
+ float *X = calloc(net.batch*test.X.rows, sizeof(float));
+ for(i = 0; i < test.X.rows; i += net.batch){
+ for(b = 0; b < net.batch; ++b){
+ if(i+b == test.X.rows) break;
+ memcpy(X+b*test.X.cols, test.X.vals[i+b], test.X.cols*sizeof(float));
+ }
+ for(m = 0; m < n; ++m){
+ float *out = network_predict(net, X);
+ for(b = 0; b < net.batch; ++b){
+ if(i+b == test.X.rows) break;
+ for(j = 0; j < k; ++j){
+ pred.vals[i+b][j] += out[j+b*k]/n;
+ }
+ }
+ }
+ }
+ free(X);
+ return pred;
+}
+
matrix network_predict_data(network net, data test)
{
int i,j,b;
@@ -525,6 +569,12 @@
image m = get_maxpool_image(layer);
n = m.h*m.w*m.c;
}
+ else if(net.types[i] == CROP){
+ crop_layer layer = *(crop_layer *)net.layers[i];
+ output = layer.output;
+ image m = get_crop_image(layer);
+ n = m.h*m.w*m.c;
+ }
else if(net.types[i] == CONNECTED){
connected_layer layer = *(connected_layer *)net.layers[i];
output = layer.output;
@@ -553,4 +603,12 @@
return acc;
}
+float network_accuracy_multi(network net, data d, int n)
+{
+ matrix guess = network_predict_data_multi(net, d, n);
+ float acc = matrix_accuracy(d.y, guess);
+ free_matrix(guess);
+ return acc;
+}
+
diff --git a/src/network.h b/src/network.h
index a9a6797..f8666e6 100644
--- a/src/network.h
+++ b/src/network.h
@@ -12,7 +12,8 @@
MAXPOOL,
SOFTMAX,
NORMALIZATION,
- DROPOUT
+ DROPOUT,
+ CROP
} LAYER_TYPE;
typedef struct {
@@ -41,6 +42,7 @@
void train_network(network net, data d);
matrix network_predict_data(network net, data test);
float network_accuracy(network net, data d);
+float network_accuracy_multi(network net, data d, int n);
float *get_network_output(network net);
float *get_network_output_layer(network net, int i);
float *get_network_delta_layer(network net, int i);
diff --git a/src/parser.c b/src/parser.c
index 1656346..5c991a5 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -4,6 +4,7 @@
#include "parser.h"
#include "activations.h"
+#include "crop_layer.h"
#include "convolutional_layer.h"
#include "connected_layer.h"
#include "maxpool_layer.h"
@@ -24,6 +25,7 @@
int is_maxpool(section *s);
int is_dropout(section *s);
int is_softmax(section *s);
+int is_crop(section *s);
int is_normalization(section *s);
list *read_cfg(char *filename);
@@ -43,6 +45,22 @@
free(s);
}
+void parse_data(char *data, float *a, int n)
+{
+ int i;
+ if(!data) return;
+ char *curr = data;
+ char *next = data;
+ int done = 0;
+ for(i = 0; i < n && !done; ++i){
+ while(*++next !='\0' && *next != ',');
+ if(*next == '\0') done = 1;
+ *next = '\0';
+ sscanf(curr, "%g", &a[i]);
+ curr = next+1;
+ }
+}
+
convolutional_layer *parse_convolutional(list *options, network *net, int count)
{
int i;
@@ -95,30 +113,8 @@
}
char *weights = option_find_str(options, "weights", 0);
char *biases = option_find_str(options, "biases", 0);
- if(biases){
- char *curr = biases;
- char *next = biases;
- int done = 0;
- for(i = 0; i < n && !done; ++i){
- while(*++next !='\0' && *next != ',');
- if(*next == '\0') done = 1;
- *next = '\0';
- sscanf(curr, "%g", &layer->biases[i]);
- curr = next+1;
- }
- }
- if(weights){
- char *curr = weights;
- char *next = weights;
- int done = 0;
- for(i = 0; i < c*n*size*size && !done; ++i){
- while(*++next !='\0' && *next != ',');
- if(*next == '\0') done = 1;
- *next = '\0';
- sscanf(curr, "%g", &layer->filters[i]);
- curr = next+1;
- }
- }
+ parse_data(biases, layer->biases, n);
+ parse_data(weights, layer->filters, c*n*size*size);
option_unused(options);
return layer;
}
@@ -164,6 +160,10 @@
curr = next+1;
}
}
+ char *weights = option_find_str(options, "weights", 0);
+ char *biases = option_find_str(options, "biases", 0);
+ parse_data(biases, layer->biases, output);
+ parse_data(weights, layer->weights, input*output);
option_unused(options);
return layer;
}
@@ -182,6 +182,36 @@
return layer;
}
+crop_layer *parse_crop(list *options, network *net, int count)
+{
+ float learning_rate, momentum, decay;
+ int h,w,c;
+ int crop_height = option_find_int(options, "crop_height",1);
+ int crop_width = option_find_int(options, "crop_width",1);
+ int flip = option_find_int(options, "flip",0);
+ if(count == 0){
+ h = option_find_int(options, "height",1);
+ w = option_find_int(options, "width",1);
+ c = option_find_int(options, "channels",1);
+ net->batch = option_find_int(options, "batch",1);
+ learning_rate = option_find_float(options, "learning_rate", .001);
+ momentum = option_find_float(options, "momentum", .9);
+ decay = option_find_float(options, "decay", .0001);
+ net->learning_rate = learning_rate;
+ net->momentum = momentum;
+ net->decay = decay;
+ }else{
+ image m = get_network_image_layer(*net, count-1);
+ h = m.h;
+ w = m.w;
+ c = m.c;
+ if(h == 0) error("Layer before crop layer must output image.");
+ }
+ crop_layer *layer = make_crop_layer(net->batch,h,w,c,crop_height,crop_width,flip);
+ option_unused(options);
+ return layer;
+}
+
maxpool_layer *parse_maxpool(list *options, network *net, int count)
{
int h,w,c;
@@ -261,6 +291,10 @@
connected_layer *layer = parse_connected(options, &net, count);
net.types[count] = CONNECTED;
net.layers[count] = layer;
+ }else if(is_crop(s)){
+ crop_layer *layer = parse_crop(options, &net, count);
+ net.types[count] = CROP;
+ net.layers[count] = layer;
}else if(is_softmax(s)){
softmax_layer *layer = parse_softmax(options, &net, count);
net.types[count] = SOFTMAX;
@@ -290,6 +324,10 @@
return net;
}
+int is_crop(section *s)
+{
+ return (strcmp(s->type, "[crop]")==0);
+}
int is_convolutional(section *s)
{
return (strcmp(s->type, "[conv]")==0
@@ -389,11 +427,11 @@
l->batch,l->h, l->w, l->c, l->learning_rate, l->momentum, l->decay);
} else {
if(l->learning_rate != net.learning_rate)
- fprintf(fp, "learning_rate=%g\n", l->learning_rate);
+ fprintf(fp, "learning_rate=%g\n", l->learning_rate);
if(l->momentum != net.momentum)
- fprintf(fp, "momentum=%g\n", l->momentum);
+ fprintf(fp, "momentum=%g\n", l->momentum);
if(l->decay != net.decay)
- fprintf(fp, "decay=%g\n", l->decay);
+ fprintf(fp, "decay=%g\n", l->decay);
}
fprintf(fp, "filters=%d\n"
"size=%d\n"
@@ -432,12 +470,30 @@
"activation=%s\n",
l->outputs,
get_activation_string(l->activation));
- fprintf(fp, "data=");
+ fprintf(fp, "biases=");
for(i = 0; i < l->outputs; ++i) fprintf(fp, "%g,", l->biases[i]);
- for(i = 0; i < l->inputs*l->outputs; ++i) fprintf(fp, "%g,", l->weights[i]);
+ fprintf(fp, "\n");
+ fprintf(fp, "weights=");
+ for(i = 0; i < l->outputs*l->inputs; ++i) fprintf(fp, "%g,", l->weights[i]);
fprintf(fp, "\n\n");
}
+void print_crop_cfg(FILE *fp, crop_layer *l, network net, int count)
+{
+ fprintf(fp, "[crop]\n");
+ if(count == 0) {
+ fprintf(fp, "batch=%d\n"
+ "height=%d\n"
+ "width=%d\n"
+ "channels=%d\n"
+ "learning_rate=%g\n"
+ "momentum=%g\n"
+ "decay=%g\n",
+ l->batch,l->h, l->w, l->c, net.learning_rate, net.momentum, net.decay);
+ }
+ fprintf(fp, "crop_height=%d\ncrop_width=%d\nflip=%d\n\n", l->crop_height, l->crop_width, l->flip);
+}
+
void print_maxpool_cfg(FILE *fp, maxpool_layer *l, network net, int count)
{
fprintf(fp, "[maxpool]\n");
@@ -481,6 +537,8 @@
print_convolutional_cfg(fp, (convolutional_layer *)net.layers[i], net, i);
else if(net.types[i] == CONNECTED)
print_connected_cfg(fp, (connected_layer *)net.layers[i], net, i);
+ else if(net.types[i] == CROP)
+ print_crop_cfg(fp, (crop_layer *)net.layers[i], net, i);
else if(net.types[i] == MAXPOOL)
print_maxpool_cfg(fp, (maxpool_layer *)net.layers[i], net, i);
else if(net.types[i] == NORMALIZATION)
diff --git a/src/utils.c b/src/utils.c
index 67a9ba1..8a65ba7 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -143,12 +143,17 @@
return field;
}
-float mean_array(float *a, int n)
+float sum_array(float *a, int n)
{
int i;
float sum = 0;
for(i = 0; i < n; ++i) sum += a[i];
- return sum/n;
+ return sum;
+}
+
+float mean_array(float *a, int n)
+{
+ return sum_array(a,n)/n;
}
float variance_array(float *a, int n)
diff --git a/src/utils.h b/src/utils.h
index 6fe0343..f38af33 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -21,6 +21,7 @@
float constrain(float a, float max);
float rand_normal();
float rand_uniform();
+float sum_array(float *a, int n);
float mean_array(float *a, int n);
float variance_array(float *a, int n);
float **one_hot_encode(float *a, int n, int k);
--
Gitblit v1.10.0