From cb1f33c6ae840e8dc0f43518daf76e6ed01034f0 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Mon, 08 Dec 2014 19:48:57 +0000
Subject: [PATCH] Fixed race condition in server

---
 src/cnn.c |  117 +++++++++++++++++++++++++++++++++++++++++++++++++++-------
 1 files changed, 102 insertions(+), 15 deletions(-)

diff --git a/src/cnn.c b/src/cnn.c
index 46248ed..f40e9a9 100644
--- a/src/cnn.c
+++ b/src/cnn.c
@@ -8,6 +8,7 @@
 #include "matrix.h"
 #include "utils.h"
 #include "mini_blas.h"
+#include "server.h"
 
 #include <time.h>
 #include <stdlib.h>
@@ -370,15 +371,52 @@
     }
 }
 
+void train_imagenet_distributed(char *address)
+{
+    float avg_loss = 1;
+    srand(time(0));
+    network net = parse_network_cfg("cfg/alexnet.client");
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    int imgs = 1000/net.batch+1;
+    imgs = 1;
+    int i = 0;
+    char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list");
+    list *plist = get_paths("/data/imagenet/cls.train.list");
+    char **paths = (char **)list_to_array(plist);
+    printf("%d\n", plist->size);
+    clock_t time;
+    while(1){
+        i += 1;
+        time=clock();
+        data train = load_data_random(imgs*net.batch, paths, plist->size, labels, 1000, 256, 256);
+        //translate_data_rows(train, -144);
+        normalize_data_rows(train);
+        printf("Loaded: %lf seconds\n", sec(clock()-time));
+        time=clock();
+#ifdef GPU
+        float loss = train_network_data_gpu(net, train, imgs);
+        client_update(net, address);
+        avg_loss = avg_loss*.9 + loss*.1;
+        printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs*net.batch);
+#endif
+        free_data(train);
+        if(i%10==0){
+            char buff[256];
+            sprintf(buff, "/home/pjreddie/imagenet_backup/alexnet_%d.cfg", i);
+            save_network(net, buff);
+        }
+    }
+}
 
 void train_imagenet()
 {
     float avg_loss = 1;
     //network net = parse_network_cfg("/home/pjreddie/imagenet_backup/alexnet_1270.cfg");
-    network net = parse_network_cfg("cfg/alexnet.part");
+    srand(time(0));
+    network net = parse_network_cfg("cfg/alexnet.cfg");
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
     int imgs = 1000/net.batch+1;
-    srand(time(0));
+    //imgs=1;
     int i = 0;
     char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list");
     list *plist = get_paths("/data/imagenet/cls.train.list");
@@ -450,7 +488,7 @@
         for(c = 0; c < 8; ++c){
             j = (r*8 + c) * 5;
             printf("Prob: %f\n", box[j]);
-            if(box[j] > .05){
+            if(box[j] > .01){
                 int d = 256/8;
                 int y = r*d+box[j+1]*d;
                 int x = c*d+box[j+2]*d;
@@ -613,19 +651,43 @@
     network net = parse_network_cfg("cfg/nist.cfg");
     data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10);
     data test = load_categorical_data_csv("data/mnist/mnist_test.csv",0,10);
-    translate_data_rows(train, -144);
-    translate_data_rows(test, -144);
+    normalize_data_rows(train);
+    normalize_data_rows(test);
     int count = 0;
     int iters = 50000/net.batch;
+    iters = 1000/net.batch + 1;
     while(++count <= 2000){
         clock_t start = clock(), end;
-        float loss = train_network_sgd(net, train, iters);
+        float loss = train_network_sgd_gpu(net, train, iters);
         end = clock();
-        float test_acc = network_accuracy(net, test);
+        float test_acc = network_accuracy_gpu(net, test);
+        //float test_acc = 0;
         printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC);
     }
 }
 
+void train_nist_distributed(char *address)
+{
+    srand(time(0));
+    network net = parse_network_cfg("cfg/nist.client");
+    data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10);
+    //data test = load_categorical_data_csv("data/mnist/mnist_test.csv",0,10);
+    normalize_data_rows(train);
+    //normalize_data_rows(test);
+    int count = 0;
+    int iters = 50000/net.batch;
+    iters = 1000/net.batch + 1;
+    while(++count <= 2000){
+        clock_t start = clock(), end;
+        float loss = train_network_sgd_gpu(net, train, iters);
+        client_update(net, address);
+        end = clock();
+        //float test_acc = network_accuracy_gpu(net, test);
+        //float test_acc = 0;
+        printf("%d: Loss: %f, Time: %lf seconds\n", count, loss, (float)(end-start)/CLOCKS_PER_SEC);
+    }
+}
+
 void test_ensemble()
 {
     int i;
@@ -715,6 +777,7 @@
     printf("%d, %d, %d\n", train.X.rows, split[0].X.rows, split[1].X.rows);
 }
 
+/*
 void test_im2row()
 {
     int h = 20;
@@ -734,6 +797,7 @@
         //image render = float_to_image(mh, mw, mc, matrix);
     }
 }
+*/
 
 void flip_network()
 {
@@ -830,15 +894,30 @@
 #endif
 }
 
-void test_server()
+void run_server()
 {
-    network net = parse_network_cfg("cfg/alexnet.test");
+    srand(time(0));
+    network net = parse_network_cfg("cfg/nist.server");
     server_update(net);
 }
 void test_client()
 {
-    network net = parse_network_cfg("cfg/alexnet.test");
-    client_update(net);
+    network net = parse_network_cfg("cfg/alexnet.client");
+    clock_t time=clock();
+    client_update(net, "localhost");
+    printf("1\n");
+    client_update(net, "localhost");
+    printf("2\n");
+    client_update(net, "localhost");
+    printf("3\n");
+    printf("Transfered: %lf seconds\n", sec(clock()-time));
+}
+
+int find_int_arg(int argc, char* argv[], char *arg)
+{
+    int i;
+    for(i = 0; i < argc-1; ++i) if(0==strcmp(argv[i], arg)) return atoi(argv[i+1]);
+    return 0;
 }
 
 int main(int argc, char *argv[])
@@ -847,20 +926,28 @@
         fprintf(stderr, "usage: %s <function>\n", argv[0]);
         return 0;
     }
+    int index = find_int_arg(argc, argv, "-i");
+    #ifdef GPU
+    cl_setup(index);
+    #endif
     if(0==strcmp(argv[1], "train")) train_imagenet();
     else if(0==strcmp(argv[1], "detection")) train_detection_net();
     else if(0==strcmp(argv[1], "asirra")) train_asirra();
     else if(0==strcmp(argv[1], "nist")) train_nist();
     else if(0==strcmp(argv[1], "test_correct")) test_correct_alexnet();
     else if(0==strcmp(argv[1], "test")) test_imagenet();
-    else if(0==strcmp(argv[1], "server")) test_server();
-    else if(0==strcmp(argv[1], "client")) test_client();
+    else if(0==strcmp(argv[1], "server")) run_server();
     else if(0==strcmp(argv[1], "detect")) test_detection();
-    else if(0==strcmp(argv[1], "visualize")) test_visualize(argv[2]);
-    else if(0==strcmp(argv[1], "valid")) validate_imagenet(argv[2]);
 #ifdef GPU
     else if(0==strcmp(argv[1], "test_gpu")) test_gpu_blas();
 #endif
+    else if(argc < 3){
+        fprintf(stderr, "usage: %s <function>\n", argv[0]);
+        return 0;
+    }
+    else if(0==strcmp(argv[1], "client")) train_nist_distributed(argv[2]);
+    else if(0==strcmp(argv[1], "visualize")) test_visualize(argv[2]);
+    else if(0==strcmp(argv[1], "valid")) validate_imagenet(argv[2]);
     fprintf(stderr, "Success!\n");
     return 0;
 }

--
Gitblit v1.10.0