From 153705226d8ca746478b69eeac9bc854766daa11 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Tue, 27 Jan 2015 21:31:06 +0000
Subject: [PATCH] Bias updates bug fix
---
src/network.c | 3 -
src/utils.h | 1
Makefile | 5 +-
src/network_kernels.cu | 9 ++++
src/connected_layer.c | 2 -
src/data.c | 3 +
src/cnn.c | 33 +++++++++-------
src/convolutional_kernels.cu | 18 ++++++---
src/utils.c | 8 ++++
9 files changed, 52 insertions(+), 30 deletions(-)
diff --git a/Makefile b/Makefile
index e48e142..cc0c9ad 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,6 @@
GPU=1
DEBUG=0
+ARCH= -arch=sm_35
VPATH=./src/
EXEC=cnn
@@ -8,7 +9,6 @@
CC=gcc
NVCC=nvcc
OPTS=-O3
-LINKER=$(CC)
LDFLAGS=`pkg-config --libs opencv` -lm -pthread
COMMON=`pkg-config --cflags opencv` -I/usr/local/cuda/include/
CFLAGS=-Wall -Wfatal-errors
@@ -20,7 +20,6 @@
endif
ifeq ($(GPU), 1)
-LINKER=$(NVCC)
COMMON+=-DGPU
CFLAGS+=-DGPU
LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas
@@ -43,7 +42,7 @@
$(CC) $(COMMON) $(CFLAGS) -c $< -o $@
$(OBJDIR)%.o: %.cu
- $(NVCC) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
+ $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
.PHONY: clean
diff --git a/src/cnn.c b/src/cnn.c
index c3b7b2c..4f575dc 100644
--- a/src/cnn.c
+++ b/src/cnn.c
@@ -212,7 +212,8 @@
//network net = parse_network_cfg("/home/pjreddie/imagenet_backup/alexnet_1270.cfg");
srand(time(0));
network net = parse_network_cfg(cfgfile);
- set_learning_network(&net, net.learning_rate, net.momentum, net.decay);
+ //test_learn_bias(*(convolutional_layer *)net.layers[1]);
+ //set_learning_network(&net, net.learning_rate, 0, net.decay);
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = 3072;
int i = net.seen/imgs;
@@ -383,25 +384,26 @@
cvWaitKey(0);
}
-void test_cifar10()
+void test_cifar10(char *cfgfile)
{
- network net = parse_network_cfg("cfg/cifar10_part5.cfg");
+ network net = parse_network_cfg(cfgfile);
data test = load_cifar10_data("data/cifar10/test_batch.bin");
clock_t start = clock(), end;
- float test_acc = network_accuracy(net, test);
+ float test_acc = network_accuracy_multi(net, test, 10);
end = clock();
- printf("%f in %f Sec\n", test_acc, (float)(end-start)/CLOCKS_PER_SEC);
- visualize_network(net);
- cvWaitKey(0);
+ printf("%f in %f Sec\n", test_acc, sec(end-start));
+ //visualize_network(net);
+ //cvWaitKey(0);
}
-void train_cifar10()
+void train_cifar10(char *cfgfile)
{
srand(555555);
- network net = parse_network_cfg("cfg/cifar10.cfg");
+ srand(time(0));
+ network net = parse_network_cfg(cfgfile);
data test = load_cifar10_data("data/cifar10/test_batch.bin");
int count = 0;
- int iters = 10000/net.batch;
+ int iters = 50000/net.batch;
data train = load_all_cifar10();
while(++count <= 10000){
clock_t time = clock();
@@ -410,9 +412,9 @@
if(count%10 == 0){
float test_acc = network_accuracy(net, test);
printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds\n", count, loss, test_acc,sec(clock()-time));
- //char buff[256];
- //sprintf(buff, "unikitty/cifar10_%d.cfg", count);
- //save_network(net, buff);
+ char buff[256];
+ sprintf(buff, "/home/pjreddie/imagenet_backup/cifar10_%d.cfg", count);
+ save_network(net, buff);
}else{
printf("%d: Loss: %f, Time: %lf seconds\n", count, loss, sec(clock()-time));
}
@@ -709,8 +711,7 @@
}
#endif
- if(0==strcmp(argv[1], "cifar")) train_cifar10();
- else if(0==strcmp(argv[1], "test_correct")) test_correct_alexnet();
+ if(0==strcmp(argv[1], "test_correct")) test_correct_alexnet();
else if(0==strcmp(argv[1], "test_correct_nist")) test_correct_nist();
else if(0==strcmp(argv[1], "test")) test_imagenet();
//else if(0==strcmp(argv[1], "server")) run_server();
@@ -724,7 +725,9 @@
return 0;
}
else if(0==strcmp(argv[1], "detection")) train_detection_net(argv[2]);
+ else if(0==strcmp(argv[1], "ctrain")) train_cifar10(argv[2]);
else if(0==strcmp(argv[1], "nist")) train_nist(argv[2]);
+ else if(0==strcmp(argv[1], "ctest")) test_cifar10(argv[2]);
else if(0==strcmp(argv[1], "train")) train_imagenet(argv[2]);
//else if(0==strcmp(argv[1], "client")) train_imagenet_distributed(argv[2]);
else if(0==strcmp(argv[1], "detect")) test_detection(argv[2]);
diff --git a/src/connected_layer.c b/src/connected_layer.c
index 254d39e..514dff0 100644
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@@ -78,8 +78,6 @@
axpy_cpu(layer->outputs, 1, layer->bias_updates, 1, layer->bias_prev, 1);
scal_cpu(layer->outputs, 0, layer->bias_updates, 1);
- //printf("rate: %f\n", layer->learning_rate);
-
axpy_cpu(layer->outputs, layer->learning_rate, layer->bias_prev, 1, layer->biases, 1);
axpy_cpu(layer->inputs*layer->outputs, -layer->decay, layer->weights, 1, layer->weight_prev, 1);
diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
index 6461aff..eaa4161 100644
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@@ -32,7 +32,7 @@
{
__shared__ float part[BLOCK];
int i,b;
- int filter = (blockIdx.x + blockIdx.y*gridDim.x);
+ int filter = blockIdx.x;
int p = threadIdx.x;
float sum = 0;
for(b = 0; b < batch; ++b){
@@ -52,8 +52,7 @@
{
int size = convolutional_out_height(layer)*convolutional_out_width(layer);
-
- learn_bias<<<cuda_gridsize(layer.n), BLOCK>>>(layer.batch, layer.n, size, layer.delta_gpu, layer.bias_updates_gpu);
+ learn_bias<<<layer.n, BLOCK>>>(layer.batch, layer.n, size, layer.delta_gpu, layer.bias_updates_gpu);
check_error(cudaPeekAtLastError());
}
@@ -96,9 +95,6 @@
gemm_ongpu(0,0,m,n,k,1.,a,k,b,n,1.,c+i*m*n,n);
}
activate_array_ongpu(layer.output_gpu, m*n*layer.batch, layer.activation);
- cuda_pull_array(layer.output_gpu, layer.output, m*n*layer.batch);
- //for(i = 0; i < m*n*layer.batch; ++i) printf("%f, ", layer.output[i]);
- //printf("\n");
}
extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, float *in, float *delta_gpu)
@@ -153,6 +149,16 @@
extern "C" void update_convolutional_layer_gpu(convolutional_layer layer)
{
int size = layer.size*layer.size*layer.c*layer.n;
+
+/*
+ cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
+ cuda_pull_array(layer.biases_gpu, layer.biases, layer.n);
+ cuda_pull_array(layer.filter_updates_gpu, layer.filter_updates, size);
+ cuda_pull_array(layer.filters_gpu, layer.filters, size);
+ printf("Bias: %f updates: %f\n", mse_array(layer.biases, layer.n), mse_array(layer.bias_updates, layer.n));
+ printf("Filter: %f updates: %f\n", mse_array(layer.filters, layer.n), mse_array(layer.filter_updates, layer.n));
+ */
+
axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
scal_ongpu(layer.n,layer.momentum, layer.bias_updates_gpu, 1);
diff --git a/src/data.c b/src/data.c
index 31aca3b..87097b6 100644
--- a/src/data.c
+++ b/src/data.c
@@ -239,7 +239,8 @@
{
struct load_args a = *(struct load_args*)ptr;
*a.d = load_data(a.paths, a.n, a.m, a.labels, a.k, a.h, a.w);
- normalize_data_rows(*a.d);
+ translate_data_rows(*a.d, -144);
+ scale_data_rows(*a.d, 1./128);
free(ptr);
return 0;
}
diff --git a/src/network.c b/src/network.c
index eb39054..f554090 100644
--- a/src/network.c
+++ b/src/network.c
@@ -42,8 +42,6 @@
return "none";
}
-
-
network make_network(int n, int batch)
{
network net;
@@ -61,7 +59,6 @@
return net;
}
-
void forward_network(network net, float *input, float *truth, int train)
{
int i;
diff --git a/src/network_kernels.cu b/src/network_kernels.cu
index a009174..7909e46 100644
--- a/src/network_kernels.cu
+++ b/src/network_kernels.cu
@@ -176,6 +176,7 @@
float train_network_datum_gpu(network net, float *x, float *y)
{
+ //clock_t time = clock();
int x_size = get_network_input_size(net)*net.batch;
int y_size = get_network_output_size(net)*net.batch;
if(!*net.input_gpu){
@@ -185,10 +186,18 @@
cuda_push_array(*net.input_gpu, x, x_size);
cuda_push_array(*net.truth_gpu, y, y_size);
}
+ //printf("trans %f\n", sec(clock() - time));
+ //time = clock();
forward_network_gpu(net, *net.input_gpu, *net.truth_gpu, 1);
+ //printf("forw %f\n", sec(clock() - time));
+ //time = clock();
backward_network_gpu(net, *net.input_gpu);
+ //printf("back %f\n", sec(clock() - time));
+ //time = clock();
update_network_gpu(net);
float error = get_network_cost(net);
+ //printf("updt %f\n", sec(clock() - time));
+ //time = clock();
return error;
}
diff --git a/src/utils.c b/src/utils.c
index a4071e2..96062b0 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -233,6 +233,14 @@
return a;
}
+float mse_array(float *a, int n)
+{
+ int i;
+ float sum = 0;
+ for(i = 0; i < n; ++i) sum += a[i]*a[i];
+ return sqrt(sum/n);
+}
+
void normalize_array(float *a, int n)
{
int i;
diff --git a/src/utils.h b/src/utils.h
index ee26d35..b1a0587 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -22,6 +22,7 @@
void translate_array(float *a, int n, float s);
int max_index(float *a, int n);
float constrain(float a, float max);
+float mse_array(float *a, int n);
float rand_normal();
float rand_uniform();
float sum_array(float *a, int n);
--
Gitblit v1.10.0