From 28e21152728cbea617948671df064ec75c7953e5 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Sun, 07 Dec 2014 08:41:26 +0000
Subject: [PATCH] Distributed training
---
src/cnn.c | 66 ++++++++++++++++++++++++++++----
1 files changed, 57 insertions(+), 9 deletions(-)
diff --git a/src/cnn.c b/src/cnn.c
index 46248ed..7971b95 100644
--- a/src/cnn.c
+++ b/src/cnn.c
@@ -8,6 +8,7 @@
#include "matrix.h"
#include "utils.h"
#include "mini_blas.h"
+#include "server.h"
#include <time.h>
#include <stdlib.h>
@@ -370,15 +371,52 @@
}
}
+void train_imagenet_distributed(char *address)
+{
+ float avg_loss = 1;
+ srand(0);
+ network net = parse_network_cfg("cfg/alexnet.client");
+ printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+ int imgs = 1000/net.batch+1;
+ imgs = 1;
+ int i = 0;
+ char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list");
+ list *plist = get_paths("/data/imagenet/cls.train.list");
+ char **paths = (char **)list_to_array(plist);
+ printf("%d\n", plist->size);
+ clock_t time;
+ while(1){
+ i += 1;
+ time=clock();
+ data train = load_data_random(imgs*net.batch, paths, plist->size, labels, 1000, 256, 256);
+ //translate_data_rows(train, -144);
+ normalize_data_rows(train);
+ printf("Loaded: %lf seconds\n", sec(clock()-time));
+ time=clock();
+#ifdef GPU
+ float loss = train_network_data_gpu(net, train, imgs);
+ client_update(net, address);
+ avg_loss = avg_loss*.9 + loss*.1;
+ printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs*net.batch);
+#endif
+ free_data(train);
+ if(i%10==0){
+ char buff[256];
+ sprintf(buff, "/home/pjreddie/imagenet_backup/alexnet_%d.cfg", i);
+ save_network(net, buff);
+ }
+ }
+}
void train_imagenet()
{
float avg_loss = 1;
//network net = parse_network_cfg("/home/pjreddie/imagenet_backup/alexnet_1270.cfg");
- network net = parse_network_cfg("cfg/alexnet.part");
+ srand(0);
+ network net = parse_network_cfg("cfg/alexnet.cfg");
printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
int imgs = 1000/net.batch+1;
- srand(time(0));
+ imgs=1;
int i = 0;
char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list");
list *plist = get_paths("/data/imagenet/cls.train.list");
@@ -450,7 +488,7 @@
for(c = 0; c < 8; ++c){
j = (r*8 + c) * 5;
printf("Prob: %f\n", box[j]);
- if(box[j] > .05){
+ if(box[j] > .01){
int d = 256/8;
int y = r*d+box[j+1]*d;
int x = c*d+box[j+2]*d;
@@ -715,6 +753,7 @@
printf("%d, %d, %d\n", train.X.rows, split[0].X.rows, split[1].X.rows);
}
+/*
void test_im2row()
{
int h = 20;
@@ -734,6 +773,7 @@
//image render = float_to_image(mh, mw, mc, matrix);
}
}
+*/
void flip_network()
{
@@ -830,15 +870,23 @@
#endif
}
-void test_server()
+void run_server()
{
- network net = parse_network_cfg("cfg/alexnet.test");
+ srand(0);
+ network net = parse_network_cfg("cfg/alexnet.server");
server_update(net);
}
void test_client()
{
- network net = parse_network_cfg("cfg/alexnet.test");
- client_update(net);
+ network net = parse_network_cfg("cfg/alexnet.client");
+ clock_t time=clock();
+ client_update(net, "localhost");
+ printf("1\n");
+ client_update(net, "localhost");
+ printf("2\n");
+ client_update(net, "localhost");
+ printf("3\n");
+ printf("Transfered: %lf seconds\n", sec(clock()-time));
}
int main(int argc, char *argv[])
@@ -853,8 +901,8 @@
else if(0==strcmp(argv[1], "nist")) train_nist();
else if(0==strcmp(argv[1], "test_correct")) test_correct_alexnet();
else if(0==strcmp(argv[1], "test")) test_imagenet();
- else if(0==strcmp(argv[1], "server")) test_server();
- else if(0==strcmp(argv[1], "client")) test_client();
+ else if(0==strcmp(argv[1], "server")) run_server();
+ else if(0==strcmp(argv[1], "client")) train_imagenet_distributed(argv[2]);
else if(0==strcmp(argv[1], "detect")) test_detection();
else if(0==strcmp(argv[1], "visualize")) test_visualize(argv[2]);
else if(0==strcmp(argv[1], "valid")) validate_imagenet(argv[2]);
--
Gitblit v1.10.0