From 153705226d8ca746478b69eeac9bc854766daa11 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Tue, 27 Jan 2015 21:31:06 +0000
Subject: [PATCH] Bias updates bug fix

---
 src/network.c                |    3 -
 src/utils.h                  |    1 
 Makefile                     |    5 +-
 src/network_kernels.cu       |    9 ++++
 src/connected_layer.c        |    2 -
 src/data.c                   |    3 +
 src/cnn.c                    |   33 +++++++++-------
 src/convolutional_kernels.cu |   18 ++++++---
 src/utils.c                  |    8 ++++
 9 files changed, 52 insertions(+), 30 deletions(-)

diff --git a/Makefile b/Makefile
index e48e142..cc0c9ad 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,6 @@
 GPU=1
 DEBUG=0
+ARCH= -arch=sm_35
 
 VPATH=./src/
 EXEC=cnn
@@ -8,7 +9,6 @@
 CC=gcc
 NVCC=nvcc
 OPTS=-O3
-LINKER=$(CC)
 LDFLAGS=`pkg-config --libs opencv` -lm -pthread
 COMMON=`pkg-config --cflags opencv` -I/usr/local/cuda/include/
 CFLAGS=-Wall -Wfatal-errors
@@ -20,7 +20,6 @@
 endif
 
 ifeq ($(GPU), 1) 
-LINKER=$(NVCC)
 COMMON+=-DGPU
 CFLAGS+=-DGPU
 LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas
@@ -43,7 +42,7 @@
 	$(CC) $(COMMON) $(CFLAGS) -c $< -o $@
 
 $(OBJDIR)%.o: %.cu 
-	$(NVCC) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
+	$(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
 
 .PHONY: clean
 
diff --git a/src/cnn.c b/src/cnn.c
index c3b7b2c..4f575dc 100644
--- a/src/cnn.c
+++ b/src/cnn.c
@@ -212,7 +212,8 @@
     //network net = parse_network_cfg("/home/pjreddie/imagenet_backup/alexnet_1270.cfg");
     srand(time(0));
     network net = parse_network_cfg(cfgfile);
-    set_learning_network(&net, net.learning_rate, net.momentum, net.decay);
+    //test_learn_bias(*(convolutional_layer *)net.layers[1]);
+    //set_learning_network(&net, net.learning_rate, 0, net.decay);
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
     int imgs = 3072;
     int i = net.seen/imgs;
@@ -383,25 +384,26 @@
     cvWaitKey(0);
 }
 
-void test_cifar10()
+void test_cifar10(char *cfgfile)
 {
-    network net = parse_network_cfg("cfg/cifar10_part5.cfg");
+    network net = parse_network_cfg(cfgfile);
     data test = load_cifar10_data("data/cifar10/test_batch.bin");
     clock_t start = clock(), end;
-    float test_acc = network_accuracy(net, test);
+    float test_acc = network_accuracy_multi(net, test, 10);
     end = clock();
-    printf("%f in %f Sec\n", test_acc, (float)(end-start)/CLOCKS_PER_SEC);
-    visualize_network(net);
-    cvWaitKey(0);
+    printf("%f in %f Sec\n", test_acc, sec(end-start));
+    //visualize_network(net);
+    //cvWaitKey(0);
 }
 
-void train_cifar10()
+void train_cifar10(char *cfgfile)
 {
     srand(555555);
-    network net = parse_network_cfg("cfg/cifar10.cfg");
+    srand(time(0));
+    network net = parse_network_cfg(cfgfile);
     data test = load_cifar10_data("data/cifar10/test_batch.bin");
     int count = 0;
-    int iters = 10000/net.batch;
+    int iters = 50000/net.batch;
     data train = load_all_cifar10();
     while(++count <= 10000){
         clock_t time = clock();
@@ -410,9 +412,9 @@
         if(count%10 == 0){
             float test_acc = network_accuracy(net, test);
             printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds\n", count, loss, test_acc,sec(clock()-time));
-            //char buff[256];
-            //sprintf(buff, "unikitty/cifar10_%d.cfg", count);
-            //save_network(net, buff);
+            char buff[256];
+            sprintf(buff, "/home/pjreddie/imagenet_backup/cifar10_%d.cfg", count);
+            save_network(net, buff);
         }else{
             printf("%d: Loss: %f, Time: %lf seconds\n", count, loss, sec(clock()-time));
         }
@@ -709,8 +711,7 @@
     }
 #endif
 
-    if(0==strcmp(argv[1], "cifar")) train_cifar10();
-    else if(0==strcmp(argv[1], "test_correct")) test_correct_alexnet();
+    if(0==strcmp(argv[1], "test_correct")) test_correct_alexnet();
     else if(0==strcmp(argv[1], "test_correct_nist")) test_correct_nist();
     else if(0==strcmp(argv[1], "test")) test_imagenet();
     //else if(0==strcmp(argv[1], "server")) run_server();
@@ -724,7 +725,9 @@
         return 0;
     }
     else if(0==strcmp(argv[1], "detection")) train_detection_net(argv[2]);
+    else if(0==strcmp(argv[1], "ctrain")) train_cifar10(argv[2]);
     else if(0==strcmp(argv[1], "nist")) train_nist(argv[2]);
+    else if(0==strcmp(argv[1], "ctest")) test_cifar10(argv[2]);
     else if(0==strcmp(argv[1], "train")) train_imagenet(argv[2]);
     //else if(0==strcmp(argv[1], "client")) train_imagenet_distributed(argv[2]);
     else if(0==strcmp(argv[1], "detect")) test_detection(argv[2]);
diff --git a/src/connected_layer.c b/src/connected_layer.c
index 254d39e..514dff0 100644
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@@ -78,8 +78,6 @@
     axpy_cpu(layer->outputs, 1, layer->bias_updates, 1, layer->bias_prev, 1);
     scal_cpu(layer->outputs, 0, layer->bias_updates, 1);
 
-    //printf("rate:   %f\n", layer->learning_rate);
-
     axpy_cpu(layer->outputs, layer->learning_rate, layer->bias_prev, 1, layer->biases, 1);
 
     axpy_cpu(layer->inputs*layer->outputs, -layer->decay, layer->weights, 1, layer->weight_prev, 1);
diff --git a/src/convolutional_kernels.cu b/src/convolutional_kernels.cu
index 6461aff..eaa4161 100644
--- a/src/convolutional_kernels.cu
+++ b/src/convolutional_kernels.cu
@@ -32,7 +32,7 @@
 {
     __shared__ float part[BLOCK];
     int i,b;
-    int filter = (blockIdx.x + blockIdx.y*gridDim.x);
+    int filter = blockIdx.x;
     int p = threadIdx.x;
     float sum = 0;
     for(b = 0; b < batch; ++b){
@@ -52,8 +52,7 @@
 {
     int size = convolutional_out_height(layer)*convolutional_out_width(layer);
 
-
-    learn_bias<<<cuda_gridsize(layer.n), BLOCK>>>(layer.batch, layer.n, size, layer.delta_gpu, layer.bias_updates_gpu);
+    learn_bias<<<layer.n, BLOCK>>>(layer.batch, layer.n, size, layer.delta_gpu, layer.bias_updates_gpu);
     check_error(cudaPeekAtLastError());
 }
 
@@ -96,9 +95,6 @@
         gemm_ongpu(0,0,m,n,k,1.,a,k,b,n,1.,c+i*m*n,n);
     }
     activate_array_ongpu(layer.output_gpu, m*n*layer.batch, layer.activation);
-    cuda_pull_array(layer.output_gpu, layer.output, m*n*layer.batch);
-    //for(i = 0; i < m*n*layer.batch; ++i) printf("%f, ", layer.output[i]);
-    //printf("\n");
 }
 
 extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, float *in, float *delta_gpu)
@@ -153,6 +149,16 @@
 extern "C" void update_convolutional_layer_gpu(convolutional_layer layer)
 {
     int size = layer.size*layer.size*layer.c*layer.n;
+
+/*
+    cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n);
+    cuda_pull_array(layer.biases_gpu, layer.biases, layer.n);
+    cuda_pull_array(layer.filter_updates_gpu, layer.filter_updates, size);
+    cuda_pull_array(layer.filters_gpu, layer.filters, size);
+    printf("Bias: %f updates: %f\n", mse_array(layer.biases, layer.n), mse_array(layer.bias_updates, layer.n));
+    printf("Filter: %f updates: %f\n", mse_array(layer.filters, layer.n), mse_array(layer.filter_updates, layer.n));
+    */
+
     axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1);
     scal_ongpu(layer.n,layer.momentum, layer.bias_updates_gpu, 1);
 
diff --git a/src/data.c b/src/data.c
index 31aca3b..87097b6 100644
--- a/src/data.c
+++ b/src/data.c
@@ -239,7 +239,8 @@
 {
     struct load_args a = *(struct load_args*)ptr;
     *a.d = load_data(a.paths, a.n, a.m, a.labels, a.k, a.h, a.w);
-    normalize_data_rows(*a.d);
+	translate_data_rows(*a.d, -144);
+	scale_data_rows(*a.d, 1./128);
     free(ptr);
     return 0;
 }
diff --git a/src/network.c b/src/network.c
index eb39054..f554090 100644
--- a/src/network.c
+++ b/src/network.c
@@ -42,8 +42,6 @@
     return "none";
 }
 
-
-
 network make_network(int n, int batch)
 {
     network net;
@@ -61,7 +59,6 @@
     return net;
 }
 
-
 void forward_network(network net, float *input, float *truth, int train)
 {
     int i;
diff --git a/src/network_kernels.cu b/src/network_kernels.cu
index a009174..7909e46 100644
--- a/src/network_kernels.cu
+++ b/src/network_kernels.cu
@@ -176,6 +176,7 @@
 
 float train_network_datum_gpu(network net, float *x, float *y)
 {
+  //clock_t time = clock();
     int x_size = get_network_input_size(net)*net.batch;
     int y_size = get_network_output_size(net)*net.batch;
     if(!*net.input_gpu){
@@ -185,10 +186,18 @@
         cuda_push_array(*net.input_gpu, x, x_size);
         cuda_push_array(*net.truth_gpu, y, y_size);
     }
+  //printf("trans %f\n", sec(clock() - time));
+  //time = clock();
     forward_network_gpu(net, *net.input_gpu, *net.truth_gpu, 1);
+  //printf("forw %f\n", sec(clock() - time));
+  //time = clock();
     backward_network_gpu(net, *net.input_gpu);
+  //printf("back %f\n", sec(clock() - time));
+  //time = clock();
     update_network_gpu(net);
     float error = get_network_cost(net);
+  //printf("updt %f\n", sec(clock() - time));
+  //time = clock();
     return error;
 }
 
diff --git a/src/utils.c b/src/utils.c
index a4071e2..96062b0 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -233,6 +233,14 @@
     return a;
 }
 
+float mse_array(float *a, int n)
+{
+    int i;
+    float sum = 0;
+    for(i = 0; i < n; ++i) sum += a[i]*a[i];
+    return sqrt(sum/n);
+}
+
 void normalize_array(float *a, int n)
 {
     int i;
diff --git a/src/utils.h b/src/utils.h
index ee26d35..b1a0587 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -22,6 +22,7 @@
 void translate_array(float *a, int n, float s);
 int max_index(float *a, int n);
 float constrain(float a, float max);
+float mse_array(float *a, int n);
 float rand_normal();
 float rand_uniform();
 float sum_array(float *a, int n);

--
Gitblit v1.10.0