From b5936b499abc94c0efffbcc99b5698574b59d860 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Sat, 05 Sep 2015 00:52:44 +0000
Subject: [PATCH] lots of stuff

---
 src/network.c          |   37 ++
 src/yolo.c             |    3 
 src/cost_layer.c       |    7 
 src/utils.h            |    2 
 src/network.h          |   16 +
 Makefile               |    6 
 src/network_kernels.cu |    9 
 src/data.c             |   57 ++++-
 src/dice.c             |    5 
 src/data.h             |    2 
 src/image.c            |    5 
 src/coco.c             |   37 ++
 src/writing.c          |    5 
 src/imagenet.c         |   17 
 src/parser.c           |   39 ++
 src/captcha.c          |    6 
 cfg/darknet.cfg        |   21 +
 src/darknet.c          |    5 
 src/compare.c          |  303 +++++++++++++++++++++++++++
 src/cost_layer.h       |    2 
 src/utils.c            |   23 ++
 21 files changed, 529 insertions(+), 78 deletions(-)

diff --git a/Makefile b/Makefile
index 116d3bc..65264de 100644
--- a/Makefile
+++ b/Makefile
@@ -1,5 +1,5 @@
-GPU=0
-OPENCV=0
+GPU=1
+OPENCV=1
 DEBUG=0
 
 ARCH= --gpu-architecture=compute_20 --gpu-code=compute_20
@@ -34,7 +34,7 @@
 LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand
 endif
 
-OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o region_layer.o layer.o
+OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o region_layer.o layer.o compare.o
 ifeq ($(GPU), 1) 
 OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
 endif
diff --git a/cfg/darknet.cfg b/cfg/darknet.cfg
index 2e0a624..f52ff3f 100644
--- a/cfg/darknet.cfg
+++ b/cfg/darknet.cfg
@@ -1,12 +1,15 @@
 [net]
 batch=128
-subdivisions=32
+subdivisions=1
 height=256
 width=256
 channels=3
-learning_rate=0.01
 momentum=0.9
 decay=0.0005
+learning_rate=0.01
+policy=poly
+power=.5
+max_batches=600000
 
 [crop]
 crop_height=224
@@ -24,8 +27,8 @@
 activation=leaky
 
 [maxpool]
+size=3
 stride=2
-size=2
 
 [convolutional]
 filters=32
@@ -35,8 +38,8 @@
 activation=leaky
 
 [maxpool]
+size=3
 stride=2
-size=2
 
 [convolutional]
 filters=64
@@ -46,8 +49,8 @@
 activation=leaky
 
 [maxpool]
+size=3
 stride=2
-size=2
 
 [convolutional]
 filters=128
@@ -57,8 +60,8 @@
 activation=leaky
 
 [maxpool]
+size=3
 stride=2
-size=2
 
 [convolutional]
 filters=256
@@ -68,8 +71,8 @@
 activation=leaky
 
 [maxpool]
+size=3
 stride=2
-size=2
 
 [convolutional]
 filters=512
@@ -79,8 +82,8 @@
 activation=leaky
 
 [maxpool]
+size=3
 stride=2
-size=2
 
 [convolutional]
 filters=1024
@@ -96,7 +99,7 @@
 
 [connected]
 output=1000
-activation=leaky
+activation=linear
 
 [softmax]
 
diff --git a/src/captcha.c b/src/captcha.c
index 68d8915..4e77ce2 100644
--- a/src/captcha.c
+++ b/src/captcha.c
@@ -38,9 +38,8 @@
         load_weights(&net, weightfile);
     }
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-    //net.seen=0;
     int imgs = 1024;
-    int i = net.seen/imgs;
+    int i = *net.seen/imgs;
     int solved = 1;
     list *plist;
     char **labels = get_labels("/data/captcha/reimgs.labels.list");
@@ -85,10 +84,9 @@
         printf("Loaded: %lf seconds\n", sec(clock()-time));
         time=clock();
         float loss = train_network(net, train);
-        net.seen += imgs;
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen);
+        printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
         free_data(train);
         if(i%100==0){
             char buff[256];
diff --git a/src/coco.c b/src/coco.c
index 62ae429..87f3dca 100644
--- a/src/coco.c
+++ b/src/coco.c
@@ -62,7 +62,7 @@
     }
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
     int imgs = 128;
-    int i = net.seen/imgs;
+    int i = *net.seen/imgs;
     data train, buffer;
 
 
@@ -107,7 +107,6 @@
 
         time=clock();
         float loss = train_network(net, train);
-        net.seen += imgs;
         if (avg_loss < 0) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
 
@@ -253,8 +252,9 @@
     int total = 0;
     int correct = 0;
     float avg_iou = 0;
-    int nms = 0;
+    int nms = 1;
     int proposals = 0;
+    int save = 1;
 
     for (i = 0; i < N; ++i) {
         char *path = paths[i];
@@ -277,6 +277,27 @@
         for(k = 0; k < num_boxes*num_boxes*num; ++k){
             if(probs[k][0] > thresh){
                 ++proposals;
+                if(save){
+                    char buff[256];
+                    sprintf(buff, "/data/extracted/nms_preds/%d", proposals);
+                    int dx = (boxes[k].x - boxes[k].w/2) * orig.w;
+                    int dy = (boxes[k].y - boxes[k].h/2) * orig.h;
+                    int w = boxes[k].w * orig.w;
+                    int h = boxes[k].h * orig.h;
+                    image cropped = crop_image(orig, dx, dy, w, h);
+                    image sized = resize_image(cropped, 224, 224);
+#ifdef OPENCV
+                    save_image_jpg(sized, buff);
+#endif
+                    free_image(sized);
+                    free_image(cropped);
+                    sprintf(buff, "/data/extracted/nms_pred_boxes/%d.txt", proposals);
+                    char *im_id = basecfg(path);
+                    FILE *fp = fopen(buff, "w");
+                    fprintf(fp, "%s %d %d %d %d\n", im_id, dx, dy, dx+w, dy+h);
+                    fclose(fp);
+                    free(im_id);
+                }
             }
         }
         for (j = 0; j < num_labels; ++j) {
@@ -332,7 +353,7 @@
     int k;
 
     int count = 0;
-    float iou_thresh = .1;
+    float iou_thresh = .3;
 
     for (i = 0; i < N; ++i) {
         fprintf(stderr, "%5d %5d\n", i, count);
@@ -361,7 +382,7 @@
                 if (iou > iou_thresh){
                     if (!overlaps) {
                         char buff[256];
-                        sprintf(buff, "/home/pjreddie/extracted/labels/%d.txt", count);
+                        sprintf(buff, "/data/extracted/labels/%d.txt", count);
                         label = fopen(buff, "w");
                         overlaps = 1;
                     }
@@ -370,16 +391,16 @@
             }
             if (overlaps) {
                 char buff[256];
-                sprintf(buff, "/home/pjreddie/extracted/imgs/%d", count++);
+                sprintf(buff, "/data/extracted/imgs/%d", count++);
                 int dx = (boxes[k].x - boxes[k].w/2) * orig.w;
                 int dy = (boxes[k].y - boxes[k].h/2) * orig.h;
                 int w = boxes[k].w * orig.w;
                 int h = boxes[k].h * orig.h;
                 image cropped = crop_image(orig, dx, dy, w, h);
                 image sized = resize_image(cropped, 224, 224);
-                #ifdef OPENCV
+#ifdef OPENCV
                 save_image_jpg(sized, buff);
-                #endif
+#endif
                 free_image(sized);
                 free_image(cropped);
                 fclose(label);
diff --git a/src/compare.c b/src/compare.c
new file mode 100644
index 0000000..9b6d6bf
--- /dev/null
+++ b/src/compare.c
@@ -0,0 +1,303 @@
+#include <stdio.h>
+
+#include "network.h"
+#include "detection_layer.h"
+#include "cost_layer.h"
+#include "utils.h"
+#include "parser.h"
+#include "box.h"
+
+void train_compare(char *cfgfile, char *weightfile)
+{
+    data_seed = time(0);
+    srand(time(0));
+    float avg_loss = -1;
+    char *base = basecfg(cfgfile);
+    char *backup_directory = "/home/pjreddie/backup/";
+    printf("%s\n", base);
+    network net = parse_network_cfg(cfgfile);
+    if(weightfile){
+        load_weights(&net, weightfile);
+    }
+    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
+    int imgs = 1024;
+    list *plist = get_paths("data/compare.train.list");
+    char **paths = (char **)list_to_array(plist);
+    int N = plist->size;
+    printf("%d\n", N);
+    clock_t time;
+    pthread_t load_thread;
+    data train;
+    data buffer;
+
+    load_args args = {0};
+    args.w = net.w;
+    args.h = net.h;
+    args.paths = paths;
+    args.classes = 20;
+    args.n = imgs;
+    args.m = N;
+    args.d = &buffer;
+    args.type = COMPARE_DATA;
+
+    load_thread = load_data_in_thread(args);
+    int epoch = *net.seen/N;
+    int i = 0;
+    while(1){
+        ++i;
+        time=clock();
+        pthread_join(load_thread, 0);
+        train = buffer;
+
+        load_thread = load_data_in_thread(args);
+        printf("Loaded: %lf seconds\n", sec(clock()-time));
+        time=clock();
+        float loss = train_network(net, train);
+        if(avg_loss == -1) avg_loss = loss;
+        avg_loss = avg_loss*.9 + loss*.1;
+        printf("%.3f: %f, %f avg, %lf seconds, %d images\n", (float)*net.seen/N, loss, avg_loss, sec(clock()-time), *net.seen);
+        free_data(train);
+        if(i%100 == 0){
+            char buff[256];
+            sprintf(buff, "%s/%s_%d_minor_%d.weights",backup_directory,base, epoch, i);
+            save_weights(net, buff);
+        }
+        if(*net.seen/N > epoch){
+            epoch = *net.seen/N;
+            i = 0;
+            char buff[256];
+            sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
+            save_weights(net, buff);
+            if(epoch%22 == 0) net.learning_rate *= .1;
+        }
+    }
+    pthread_join(load_thread, 0);
+    free_data(buffer);
+    free_network(net);
+    free_ptrs((void**)paths, plist->size);
+    free_list(plist);
+    free(base);
+}
+
+void validate_compare(char *filename, char *weightfile)
+{
+    int i = 0;
+    network net = parse_network_cfg(filename);
+    if(weightfile){
+        load_weights(&net, weightfile);
+    }
+    srand(time(0));
+
+    list *plist = get_paths("data/compare.val.list");
+    //list *plist = get_paths("data/compare.val.old");
+    char **paths = (char **)list_to_array(plist);
+    int N = plist->size/2;
+    free_list(plist);
+
+    clock_t time;
+    int correct = 0;
+    int total = 0;
+    int splits = 10;
+    int num = (i+1)*N/splits - i*N/splits;
+
+    data val, buffer;
+
+    load_args args = {0};
+    args.w = net.w;
+    args.h = net.h;
+    args.paths = paths;
+    args.classes = 20;
+    args.n = num;
+    args.m = 0;
+    args.d = &buffer;
+    args.type = COMPARE_DATA;
+
+    pthread_t load_thread = load_data_in_thread(args);
+    for(i = 1; i <= splits; ++i){
+        time=clock();
+
+        pthread_join(load_thread, 0);
+        val = buffer;
+
+        num = (i+1)*N/splits - i*N/splits;
+        char **part = paths+(i*N/splits);
+        if(i != splits){
+            args.paths = part;
+            load_thread = load_data_in_thread(args);
+        }
+        printf("Loaded: %d images in %lf seconds\n", val.X.rows, sec(clock()-time));
+
+        time=clock();
+        matrix pred = network_predict_data(net, val);
+        int j,k;
+        for(j = 0; j < val.y.rows; ++j){
+            for(k = 0; k < 20; ++k){
+                if(val.y.vals[j][k*2] != val.y.vals[j][k*2+1]){
+                    ++total;
+                    if((val.y.vals[j][k*2] < val.y.vals[j][k*2+1]) == (pred.vals[j][k*2] < pred.vals[j][k*2+1])){
+                        ++correct;
+                    }
+                }
+            }
+        }
+        free_matrix(pred);
+        printf("%d: Acc: %f, %lf seconds, %d images\n", i, (float)correct/total, sec(clock()-time), val.X.rows);
+        free_data(val);
+    }
+}
+
+typedef struct {
+    network net;
+    char *filename;
+    int class;
+    float elo;
+} sortable_bbox;
+
+int total_compares = 0;
+
+int elo_comparator(const void*a, const void *b)
+{
+    sortable_bbox box1 = *(sortable_bbox*)a;
+    sortable_bbox box2 = *(sortable_bbox*)b;
+    if(box1.elo == box2.elo) return 0;
+    if(box1.elo >  box2.elo) return -1;
+    return 1;
+}
+
+int bbox_comparator(const void *a, const void *b)
+{
+    ++total_compares;
+    sortable_bbox box1 = *(sortable_bbox*)a;
+    sortable_bbox box2 = *(sortable_bbox*)b;
+    network net = box1.net;
+    int class   = box1.class;
+
+    image im1 = load_image_color(box1.filename, net.w, net.h);
+    image im2 = load_image_color(box2.filename, net.w, net.h);
+    float *X  = calloc(net.w*net.h*net.c, sizeof(float));
+    memcpy(X,                   im1.data, im1.w*im1.h*im1.c);
+    memcpy(X+im1.w*im1.h*im1.c, im2.data, im2.w*im2.h*im2.c);
+    float *predictions = network_predict(net, X);
+    
+    free_image(im1);
+    free_image(im2);
+    free(X);
+    if (predictions[class*2] > predictions[class*2+1]){
+        return 1;
+    }
+    return -1;
+}
+
+void bbox_fight(sortable_bbox *a, sortable_bbox *b)
+{
+    int k = 32;
+    int result = bbox_comparator(a,b);
+    float EA = 1./(1+pow(10, (b->elo - a->elo)/400.));
+    float EB = 1./(1+pow(10, (a->elo - b->elo)/400.));
+    float SA = 1.*(result > 0);
+    float SB = 1.*(result < 0);
+    a->elo = a->elo + k*(SA - EA);
+    b->elo = b->elo + k*(SB - EB);
+}
+
+void SortMaster3000(char *filename, char *weightfile)
+{
+    int i = 0;
+    network net = parse_network_cfg(filename);
+    if(weightfile){
+        load_weights(&net, weightfile);
+    }
+    srand(time(0));
+    set_batch_network(&net, 1);
+
+    list *plist = get_paths("data/compare.sort.list");
+    //list *plist = get_paths("data/compare.val.old");
+    char **paths = (char **)list_to_array(plist);
+    int N = plist->size;
+    free_list(plist);
+    sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox));
+    printf("Sorting %d boxes...\n", N);
+    for(i = 0; i < N; ++i){
+        boxes[i].filename = paths[i];
+        boxes[i].net = net;
+        boxes[i].class = 7;
+        boxes[i].elo = 1500;
+    }
+    clock_t time=clock();
+    qsort(boxes, N, sizeof(sortable_bbox), bbox_comparator);
+    for(i = 0; i < N; ++i){
+        printf("%s\n", boxes[i].filename);
+    }
+    printf("Sorted in %d compares, %f secs\n", total_compares, sec(clock()-time));
+}
+
+void BattleRoyaleWithCheese(char *filename, char *weightfile)
+{
+    int i = 0;
+    network net = parse_network_cfg(filename);
+    if(weightfile){
+        load_weights(&net, weightfile);
+    }
+    srand(time(0));
+    set_batch_network(&net, 1);
+
+    list *plist = get_paths("data/compare.sort.list");
+    //list *plist = get_paths("data/compare.val.old");
+    char **paths = (char **)list_to_array(plist);
+    int N = plist->size;
+    free_list(plist);
+    sortable_bbox *boxes = calloc(N, sizeof(sortable_bbox));
+    printf("Battling %d boxes...\n", N);
+    for(i = 0; i < N; ++i){
+        boxes[i].filename = paths[i];
+        boxes[i].net = net;
+        boxes[i].class = 7;
+        boxes[i].elo = 1500;
+    }
+    int round;
+    clock_t time=clock();
+    for(round = 1; round <= 40; ++round){
+        clock_t round_time=clock();
+        printf("Round: %d\n", round);
+        qsort(boxes, N, sizeof(sortable_bbox), elo_comparator);
+        sorta_shuffle(boxes, N, sizeof(sortable_bbox), 10);
+        for(i = 0; i < N/2; ++i){
+            bbox_fight(boxes+i*2, boxes+i*2+1);
+        }
+        if(round >= 4){
+            qsort(boxes, N, sizeof(sortable_bbox), elo_comparator);
+            if(round == 4){
+                N = N/2;
+            }else{
+                N = (N*9/10)/2*2;
+            }
+        }
+        printf("Round: %f secs, %d remaining\n", sec(clock()-round_time), N);
+    }
+    qsort(boxes, N, sizeof(sortable_bbox), elo_comparator);
+    for(i = 0; i < N; ++i){
+        printf("%s %f\n", boxes[i].filename, boxes[i].elo);
+    }
+    printf("Tournament in %d compares, %f secs\n", total_compares, sec(clock()-time));
+}
+
+void run_compare(int argc, char **argv)
+{
+    if(argc < 4){
+        fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
+        return;
+    }
+
+    char *cfg = argv[3];
+    char *weights = (argc > 4) ? argv[4] : 0;
+    //char *filename = (argc > 5) ? argv[5]: 0;
+    if(0==strcmp(argv[2], "train")) train_compare(cfg, weights);
+    else if(0==strcmp(argv[2], "valid")) validate_compare(cfg, weights);
+    else if(0==strcmp(argv[2], "sort")) SortMaster3000(cfg, weights);
+    else if(0==strcmp(argv[2], "battle")) BattleRoyaleWithCheese(cfg, weights);
+    /*
+       else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights);
+       else if(0==strcmp(argv[2], "extract")) extract_boxes(cfg, weights);
+       else if(0==strcmp(argv[2], "valid")) validate_recall(cfg, weights);
+     */
+}
diff --git a/src/cost_layer.c b/src/cost_layer.c
index d1ae6e5..4ec0ac4 100644
--- a/src/cost_layer.c
+++ b/src/cost_layer.c
@@ -26,12 +26,13 @@
     return "sse";
 }
 
-cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type)
+cost_layer make_cost_layer(int batch, int inputs, COST_TYPE cost_type, float scale)
 {
     fprintf(stderr, "Cost Layer: %d inputs\n", inputs);
     cost_layer l = {0};
     l.type = COST;
 
+    l.scale = scale;
     l.batch = batch;
     l.inputs = inputs;
     l.outputs = inputs;
@@ -61,7 +62,7 @@
 
 void backward_cost_layer(const cost_layer l, network_state state)
 {
-    axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
+    axpy_cpu(l.batch*l.inputs, l.scale, l.delta, 1, state.delta, 1);
 }
 
 #ifdef GPU
@@ -92,7 +93,7 @@
 
 void backward_cost_layer_gpu(const cost_layer l, network_state state)
 {
-    axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1);
+    axpy_ongpu(l.batch*l.inputs, l.scale, l.delta_gpu, 1, state.delta, 1);
 }
 #endif
 
diff --git a/src/cost_layer.h b/src/cost_layer.h
index 0732323..9ad3124 100644
--- a/src/cost_layer.h
+++ b/src/cost_layer.h
@@ -7,7 +7,7 @@
 
 COST_TYPE get_cost_type(char *s);
 char *get_cost_string(COST_TYPE a);
-cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type);
+cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale);
 void forward_cost_layer(const cost_layer l, network_state state);
 void backward_cost_layer(const cost_layer l, network_state state);
 
diff --git a/src/darknet.c b/src/darknet.c
index f87afc6..3709ed1 100644
--- a/src/darknet.c
+++ b/src/darknet.c
@@ -18,6 +18,7 @@
 extern void run_captcha(int argc, char **argv);
 extern void run_nightmare(int argc, char **argv);
 extern void run_dice(int argc, char **argv);
+extern void run_compare(int argc, char **argv);
 
 void change_rate(char *filename, float scale, float add)
 {
@@ -86,7 +87,7 @@
     if(weightfile){
         load_weights_upto(&net, weightfile, max);
     }
-    net.seen = 0;
+    *net.seen = 0;
     save_weights_upto(net, outfile, max);
 }
 
@@ -179,6 +180,8 @@
         run_yolo(argc, argv);
     } else if (0 == strcmp(argv[1], "coco")){
         run_coco(argc, argv);
+    } else if (0 == strcmp(argv[1], "compare")){
+        run_compare(argc, argv);
     } else if (0 == strcmp(argv[1], "dice")){
         run_dice(argc, argv);
     } else if (0 == strcmp(argv[1], "writing")){
diff --git a/src/data.c b/src/data.c
index ec2b304..003338e 100644
--- a/src/data.c
+++ b/src/data.c
@@ -413,8 +413,8 @@
 
 data load_data_compare(int n, char **paths, int m, int classes, int w, int h)
 {
-    char **random_paths = get_random_paths(paths, 2*n, m);
-    int i;
+    if(m) paths = get_random_paths(paths, 2*n, m);
+    int i,j;
     data d;
     d.shallow = 0;
 
@@ -425,20 +425,51 @@
     int k = 2*(classes);
     d.y = make_matrix(n, k);
     for(i = 0; i < n; ++i){
-        image im1 = load_image_color(random_paths[i*2],   w, h);
-        image im2 = load_image_color(random_paths[i*2+1], w, h);
+        image im1 = load_image_color(paths[i*2],   w, h);
+        image im2 = load_image_color(paths[i*2+1], w, h);
 
         d.X.vals[i] = calloc(d.X.cols, sizeof(float));
         memcpy(d.X.vals[i],         im1.data, h*w*3*sizeof(float));
         memcpy(d.X.vals[i] + h*w*3, im2.data, h*w*3*sizeof(float));
 
-        //char *imlabel1 = find_replace(random_paths[i*2],   "imgs", "labels");
-        //char *imlabel2 = find_replace(random_paths[i*2+1], "imgs", "labels");
+        int id;
+        float iou;
+
+        char *imlabel1 = find_replace(paths[i*2],   "imgs", "labels");
+        imlabel1 = find_replace(imlabel1, "jpg", "txt");
+        FILE *fp1 = fopen(imlabel1, "r");
+
+        while(fscanf(fp1, "%d %f", &id, &iou) == 2){
+            if (d.y.vals[i][2*id] < iou) d.y.vals[i][2*id] = iou;
+        }
+
+        char *imlabel2 = find_replace(paths[i*2+1], "imgs", "labels");
+        imlabel2 = find_replace(imlabel2, "jpg", "txt");
+        FILE *fp2 = fopen(imlabel2, "r");
+
+        while(fscanf(fp2, "%d %f", &id, &iou) == 2){
+            if (d.y.vals[i][2*id + 1] < iou) d.y.vals[i][2*id + 1] = iou;
+        }
+        
+        for (j = 0; j < classes; ++j){
+            if (d.y.vals[i][2*j] > .5 &&  d.y.vals[i][2*j+1] < .5){
+                d.y.vals[i][2*j] = 1;
+                d.y.vals[i][2*j+1] = 0;
+            } else if (d.y.vals[i][2*j] < .5 &&  d.y.vals[i][2*j+1] > .5){
+                d.y.vals[i][2*j] = 0;
+                d.y.vals[i][2*j+1] = 1;
+            } else {
+                d.y.vals[i][2*j]   = SECRET_NUM;
+                d.y.vals[i][2*j+1] = SECRET_NUM;
+            }
+        }
+        fclose(fp1);
+        fclose(fp2);
 
         free_image(im1);
         free_image(im2);
     }
-    free(random_paths);
+    if(m) free(paths);
     return d;
 }
 
@@ -503,11 +534,11 @@
 
 void *load_thread(void *ptr)
 {
-    
-    #ifdef GPU
-        cudaError_t status = cudaSetDevice(gpu_index);
-        check_error(status);
-    #endif
+
+#ifdef GPU
+    cudaError_t status = cudaSetDevice(gpu_index);
+    check_error(status);
+#endif
 
     printf("Loading data: %d\n", rand_r(&data_seed));
     load_args a = *(struct load_args*)ptr;
@@ -517,6 +548,8 @@
         *a.d = load_data_detection(a.n, a.paths, a.m, a.classes, a.w, a.h, a.num_boxes, a.background);
     } else if (a.type == REGION_DATA){
         *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes);
+    } else if (a.type == COMPARE_DATA){
+        *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h);
     } else if (a.type == IMAGE_DATA){
         *(a.im) = load_image_color(a.path, 0, 0);
         *(a.resized) = resize_image(*(a.im), a.w, a.h);
diff --git a/src/data.h b/src/data.h
index 7c425ba..216ab0c 100644
--- a/src/data.h
+++ b/src/data.h
@@ -26,7 +26,7 @@
 } data;
 
 typedef enum {
-    CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA
+    CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA
 } data_type;
 
 typedef struct load_args{
diff --git a/src/dice.c b/src/dice.c
index 3283fe9..7948741 100644
--- a/src/dice.c
+++ b/src/dice.c
@@ -18,7 +18,7 @@
     }
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
     int imgs = 1024;
-    int i = net.seen/imgs;
+    int i = *net.seen/imgs;
     char **labels = dice_labels;
     list *plist = get_paths("data/dice/dice.train.list");
     char **paths = (char **)list_to_array(plist);
@@ -32,10 +32,9 @@
 
         time=clock();
         float loss = train_network(net, train);
-        net.seen += imgs;
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen);
+        printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
         free_data(train);
         if((i % 100) == 0) net.learning_rate *= .1;
         if(i%100==0){
diff --git a/src/image.c b/src/image.c
index fa0bceb..b6d7577 100644
--- a/src/image.c
+++ b/src/image.c
@@ -274,6 +274,8 @@
 #ifdef OPENCV
     void save_image_jpg(image p, char *name)
     {
+        image copy = copy_image(p);
+        rgbgr_image(copy);
         int x,y,k;
 
         char buff[256];
@@ -284,12 +286,13 @@
         for(y = 0; y < p.h; ++y){
             for(x = 0; x < p.w; ++x){
                 for(k= 0; k < p.c; ++k){
-                    disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(p,x,y,k)*255);
+                    disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(copy,x,y,k)*255);
                 }
             }
         }
         cvSaveImage(buff, disp,0);
         cvReleaseImage(&disp);
+        free_image(copy);
     }
     #endif
 
diff --git a/src/imagenet.c b/src/imagenet.c
index 5d79483..c826a0f 100644
--- a/src/imagenet.c
+++ b/src/imagenet.c
@@ -19,7 +19,6 @@
         load_weights(&net, weightfile);
     }
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
-    //net.seen=0;
     int imgs = 1024;
     char **labels = get_labels("data/inet.labels.list");
     list *plist = get_paths("/data/imagenet/cls.train.list");
@@ -43,8 +42,8 @@
     args.type = CLASSIFICATION_DATA;
 
     load_thread = load_data_in_thread(args);
-    int epoch = net.seen/N;
-    while(1){
+    int epoch = (*net.seen)/N;
+    while(get_current_batch(net) < net.max_batches || net.max_batches == 0){
         time=clock();
         pthread_join(load_thread, 0);
         train = buffer;
@@ -59,19 +58,21 @@
         printf("Loaded: %lf seconds\n", sec(clock()-time));
         time=clock();
         float loss = train_network(net, train);
-        net.seen += imgs;
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%.3f: %f, %f avg, %lf seconds, %d images\n", (float)net.seen/N, loss, avg_loss, sec(clock()-time), net.seen);
+        printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
         free_data(train);
-        if(net.seen/N > epoch){
-            epoch = net.seen/N;
+        if(*net.seen/N > epoch){
+            epoch = *net.seen/N;
             char buff[256];
             sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
             save_weights(net, buff);
-            if(epoch%22 == 0) net.learning_rate *= .1;
         }
     }
+    char buff[256];
+    sprintf(buff, "%s/%s.weights", backup_directory, base);
+    save_weights(net, buff);
+
     pthread_join(load_thread, 0);
     free_data(buffer);
     free_network(net);
diff --git a/src/network.c b/src/network.c
index 70bcb58..d823c15 100644
--- a/src/network.c
+++ b/src/network.c
@@ -20,6 +20,30 @@
 #include "dropout_layer.h"
 #include "route_layer.h"
 
+int get_current_batch(network net)
+{
+    int batch_num = (*net.seen)/(net.batch*net.subdivisions);
+    return batch_num;
+}
+
+float get_current_rate(network net)
+{
+    int batch_num = get_current_batch(net);
+    switch (net.policy) {
+        case CONSTANT:
+            return net.learning_rate;
+        case STEP:
+            return net.learning_rate * pow(net.gamma, batch_num/net.step);
+        case EXP:
+            return net.learning_rate * pow(net.gamma, batch_num);
+        case POLY:
+            return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power);
+        default:
+            fprintf(stderr, "Policy is weird!\n");
+            return net.learning_rate;
+    }
+}
+
 char *get_layer_string(LAYER_TYPE a)
 {
     switch(a){
@@ -60,6 +84,7 @@
     network net = {0};
     net.n = n;
     net.layers = calloc(net.n, sizeof(layer));
+    net.seen = calloc(1, sizeof(int));
     #ifdef GPU
     net.input_gpu = calloc(1, sizeof(float *));
     net.truth_gpu = calloc(1, sizeof(float *));
@@ -110,14 +135,15 @@
 {
     int i;
     int update_batch = net.batch*net.subdivisions;
+    float rate = get_current_rate(net);
     for(i = 0; i < net.n; ++i){
         layer l = net.layers[i];
         if(l.type == CONVOLUTIONAL){
-            update_convolutional_layer(l, update_batch, net.learning_rate, net.momentum, net.decay);
+            update_convolutional_layer(l, update_batch, rate, net.momentum, net.decay);
         } else if(l.type == DECONVOLUTIONAL){
-            update_deconvolutional_layer(l, net.learning_rate, net.momentum, net.decay);
+            update_deconvolutional_layer(l, rate, net.momentum, net.decay);
         } else if(l.type == CONNECTED){
-            update_connected_layer(l, update_batch, net.learning_rate, net.momentum, net.decay);
+            update_connected_layer(l, update_batch, rate, net.momentum, net.decay);
         }
     }
 }
@@ -203,6 +229,7 @@
 
 float train_network_datum(network net, float *x, float *y)
 {
+    *net.seen += net.batch;
 #ifdef GPU
     if(gpu_index >= 0) return train_network_datum_gpu(net, x, y);
 #endif
@@ -214,7 +241,7 @@
     forward_network(net, state);
     backward_network(net, state);
     float error = get_network_cost(net);
-    if((net.seen/net.batch)%net.subdivisions == 0) update_network(net);
+    if(((*net.seen)/net.batch)%net.subdivisions == 0) update_network(net);
     return error;
 }
 
@@ -227,7 +254,6 @@
     int i;
     float sum = 0;
     for(i = 0; i < n; ++i){
-        net.seen += batch;
         get_random_batch(d, batch, X, y);
         float err = train_network_datum(net, X, y);
         sum += err;
@@ -248,7 +274,6 @@
     float sum = 0;
     for(i = 0; i < n; ++i){
         get_next_batch(d, batch, i*batch, X, y);
-        net.seen += batch;
         float err = train_network_datum(net, X, y);
         sum += err;
     }
diff --git a/src/network.h b/src/network.h
index 1d960c0..85e5dbc 100644
--- a/src/network.h
+++ b/src/network.h
@@ -7,17 +7,27 @@
 #include "layer.h"
 #include "data.h"
 
+typedef enum {
+    CONSTANT, STEP, EXP, POLY
+} learning_rate_policy;
+
 typedef struct {
     int n;
     int batch;
-    int seen;
+    int *seen;
     int subdivisions;
-    float learning_rate;
     float momentum;
     float decay;
     layer *layers;
     int outputs;
     float *output;
+    learning_rate_policy policy;
+
+    float learning_rate;
+    float gamma;
+    float power;
+    int step;
+    int max_batches;
 
     int inputs;
     int h, w, c;
@@ -38,6 +48,8 @@
 void backward_network_gpu(network net, network_state state);
 #endif
 
+float get_current_rate(network net);
+int get_current_batch(network net);
 void free_network(network net);
 void compare_networks(network n1, network n2, data d);
 char *get_layer_string(LAYER_TYPE a);
diff --git a/src/network_kernels.cu b/src/network_kernels.cu
index a73ddd9..1f0a654 100644
--- a/src/network_kernels.cu
+++ b/src/network_kernels.cu
@@ -116,14 +116,15 @@
 {
     int i;
     int update_batch = net.batch*net.subdivisions;
+    float rate = get_current_rate(net);
     for(i = 0; i < net.n; ++i){
         layer l = net.layers[i];
         if(l.type == CONVOLUTIONAL){
-            update_convolutional_layer_gpu(l, update_batch, net.learning_rate, net.momentum, net.decay);
+            update_convolutional_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
         } else if(l.type == DECONVOLUTIONAL){
-            update_deconvolutional_layer_gpu(l, net.learning_rate, net.momentum, net.decay);
+            update_deconvolutional_layer_gpu(l, rate, net.momentum, net.decay);
         } else if(l.type == CONNECTED){
-            update_connected_layer_gpu(l, update_batch, net.learning_rate, net.momentum, net.decay);
+            update_connected_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
         }
     }
 }
@@ -147,7 +148,7 @@
     forward_network_gpu(net, state);
     backward_network_gpu(net, state);
     float error = get_network_cost(net);
-    if ((net.seen / net.batch) % net.subdivisions == 0) update_network_gpu(net);
+    if (((*net.seen) / net.batch) % net.subdivisions == 0) update_network_gpu(net);
 
     return error;
 }
diff --git a/src/parser.c b/src/parser.c
index ad324e9..b9f6cb6 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -189,7 +189,8 @@
 {
     char *type_s = option_find_str(options, "type", "sse");
     COST_TYPE type = get_cost_type(type_s);
-    cost_layer layer = make_cost_layer(params.batch, params.inputs, type);
+    float scale = option_find_float_quiet(options, "scale",1);
+    cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale);
     return layer;
 }
 
@@ -305,6 +306,16 @@
     return layer;
 }
 
+learning_rate_policy get_policy(char *s)
+{
+    if (strcmp(s, "poly")==0) return POLY;
+    if (strcmp(s, "constant")==0) return CONSTANT;
+    if (strcmp(s, "step")==0) return STEP;
+    if (strcmp(s, "exp")==0) return EXP;
+    fprintf(stderr, "Couldn't find policy %s, going with constant\n", s);
+    return CONSTANT;
+}
+
 void parse_net_options(list *options, network *net)
 {
     net->batch = option_find_int(options, "batch",1);
@@ -319,7 +330,20 @@
     net->w = option_find_int_quiet(options, "width",0);
     net->c = option_find_int_quiet(options, "channels",0);
     net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c);
+
     if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
+
+    char *policy_s = option_find_str(options, "policy", "constant");
+    net->policy = get_policy(policy_s);
+    if(net->policy == STEP){
+        net->step = option_find_int(options, "step", 1);
+        net->gamma = option_find_float(options, "gamma", 1);
+    } else if (net->policy == EXP){
+        net->gamma = option_find_float(options, "gamma", 1);
+    } else if (net->policy == POLY){
+        net->power = option_find_float(options, "power", 1);
+    }
+    net->max_batches = option_find_int(options, "max_batches", 0);
 }
 
 network parse_network_cfg(char *filename)
@@ -532,7 +556,7 @@
     fwrite(&net.learning_rate, sizeof(float), 1, fp);
     fwrite(&net.momentum, sizeof(float), 1, fp);
     fwrite(&net.decay, sizeof(float), 1, fp);
-    fwrite(&net.seen, sizeof(int), 1, fp);
+    fwrite(net.seen, sizeof(int), 1, fp);
 
     int i,j,k;
     for(i = 0; i < net.n; ++i){
@@ -571,7 +595,7 @@
     fwrite(&net.learning_rate, sizeof(float), 1, fp);
     fwrite(&net.momentum, sizeof(float), 1, fp);
     fwrite(&net.decay, sizeof(float), 1, fp);
-    fwrite(&net.seen, sizeof(int), 1, fp);
+    fwrite(net.seen, sizeof(int), 1, fp);
 
     int i;
     for(i = 0; i < net.n && i < cutoff; ++i){
@@ -620,10 +644,11 @@
     FILE *fp = fopen(filename, "r");
     if(!fp) file_error(filename);
 
-    fread(&net->learning_rate, sizeof(float), 1, fp);
-    fread(&net->momentum, sizeof(float), 1, fp);
-    fread(&net->decay, sizeof(float), 1, fp);
-    fread(&net->seen, sizeof(int), 1, fp);
+    float garbage;
+    fread(&garbage, sizeof(float), 1, fp);
+    fread(&garbage, sizeof(float), 1, fp);
+    fread(&garbage, sizeof(float), 1, fp);
+    fread(net->seen, sizeof(int), 1, fp);
 
     int i;
     for(i = 0; i < net->n && i < cutoff; ++i){
diff --git a/src/utils.c b/src/utils.c
index d54e966..3121ef6 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -8,6 +8,29 @@
 
 #include "utils.h"
 
+void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections)
+{
+    size_t i;
+    for(i = 0; i < sections; ++i){
+        size_t start = n*i/sections;
+        size_t end = n*(i+1)/sections;
+        size_t num = end-start;
+        shuffle(arr+(start*size), num, size);
+    }
+}
+
+void shuffle(void *arr, size_t n, size_t size)
+{
+    size_t i;
+    void *swp = calloc(1, size);
+    for(i = 0; i < n-1; ++i){
+        size_t j = i + rand()/(RAND_MAX / (n-i)+1);
+        memcpy(swp,          arr+(j*size), size);
+        memcpy(arr+(j*size), arr+(i*size), size);
+        memcpy(arr+(i*size), swp,          size);
+    }
+}
+
 void del_arg(int argc, char **argv, int index)
 {
     int i;
diff --git a/src/utils.h b/src/utils.h
index 9332702..1b9ba08 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -6,6 +6,8 @@
 
 #define SECRET_NUM -1234
 
+void shuffle(void *arr, size_t n, size_t size);
+void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections);
 void free_ptrs(void **ptrs, int n);
 char *basecfg(char *cfgfile);
 int alphanum_to_int(char c);
diff --git a/src/writing.c b/src/writing.c
index 1c1684b..cfbc5fd 100644
--- a/src/writing.c
+++ b/src/writing.c
@@ -15,7 +15,7 @@
     }
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
     int imgs = 1024;
-    int i = net.seen/imgs;
+    int i = *net.seen/imgs;
     list *plist = get_paths("figures.list");
     char **paths = (char **)list_to_array(plist);
     printf("%d\n", plist->size);
@@ -44,10 +44,9 @@
         cvWaitKey(0);
         */
 
-        net.seen += imgs;
         if(avg_loss == -1) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
-        printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen);
+        printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), *net.seen);
         free_data(train);
         if((i % 20000) == 0) net.learning_rate *= .1;
         //if(i%100 == 0 && net.learning_rate > .00001) net.learning_rate *= .97;
diff --git a/src/yolo.c b/src/yolo.c
index 9bf96de..61a5344 100644
--- a/src/yolo.c
+++ b/src/yolo.c
@@ -68,7 +68,7 @@
     detection_layer layer = get_network_detection_layer(net);
     printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
     int imgs = 128;
-    int i = net.seen/imgs;
+    int i = *net.seen/imgs;
 
     char **paths;
     list *plist = get_paths(train_images);
@@ -112,7 +112,6 @@
         printf("Loaded: %lf seconds\n", sec(clock()-time));
         time=clock();
         float loss = train_network(net, train);
-        net.seen += imgs;
         if (avg_loss < 0) avg_loss = loss;
         avg_loss = avg_loss*.9 + loss*.1;
 

--
Gitblit v1.10.0