From b3c4fc9f223d9b6f50a1652d8d116fcdcc16f2e8 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Sun, 27 Nov 2016 04:02:46 +0000
Subject: [PATCH] :fire: ARE YOU NOT ENTERTAINED :fire:

---
 cfg/yolo-tiny_voc.cfg |  134 ++++++++++++++++++++++
 src/region_layer.h    |    2 
 src/demo.c            |    2 
 cfg/yolo-tiny.cfg     |  134 ++++++++++++++++++++++
 src/detector.c        |   63 +++++-----
 src/region_layer.c    |   19 +-
 6 files changed, 314 insertions(+), 40 deletions(-)

diff --git a/cfg/yolo-tiny.cfg b/cfg/yolo-tiny.cfg
new file mode 100644
index 0000000..5580098
--- /dev/null
+++ b/cfg/yolo-tiny.cfg
@@ -0,0 +1,134 @@
+[net]
+batch=64
+subdivisions=8
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+max_batches = 120000
+policy=steps
+steps=-1,100,80000,100000
+scales=.1,10,.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=1
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=425
+activation=linear
+
+[region]
+anchors = 0.738768,0.874946,  2.42204,2.65704,  4.30971,7.04493,  10.246,4.59428,  12.6868,11.8741
+bias_match=1
+classes=80
+coords=4
+num=5
+softmax=1
+jitter=.2
+rescore=1
+
+object_scale=5
+noobject_scale=1
+class_scale=1
+coord_scale=1
+
+absolute=1
+thresh = .6
+random=1
diff --git a/cfg/yolo-tiny_voc.cfg b/cfg/yolo-tiny_voc.cfg
new file mode 100644
index 0000000..1f33c35
--- /dev/null
+++ b/cfg/yolo-tiny_voc.cfg
@@ -0,0 +1,134 @@
+[net]
+batch=64
+subdivisions=8
+width=416
+height=416
+channels=3
+momentum=0.9
+decay=0.0005
+angle=0
+saturation = 1.5
+exposure = 1.5
+hue=.1
+
+learning_rate=0.001
+max_batches = 40100
+policy=steps
+steps=-1,100,20000,30000
+scales=.1,10,.1,.1
+
+[convolutional]
+batch_normalize=1
+filters=16
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=64
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=128
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=256
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=2
+
+[convolutional]
+batch_normalize=1
+filters=512
+size=3
+stride=1
+pad=1
+activation=leaky
+
+[maxpool]
+size=2
+stride=1
+
+[convolutional]
+batch_normalize=1
+filters=1024
+size=3
+stride=1
+pad=1
+activation=leaky
+
+###########
+
+[convolutional]
+batch_normalize=1
+size=3
+stride=1
+pad=1
+filters=1024
+activation=leaky
+
+[convolutional]
+size=1
+stride=1
+pad=1
+filters=125
+activation=linear
+
+[region]
+anchors = 1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52
+bias_match=1
+classes=20
+coords=4
+num=5
+softmax=1
+jitter=.2
+rescore=1
+
+object_scale=5
+noobject_scale=1
+class_scale=1
+coord_scale=1
+
+absolute=1
+thresh = .6
+random=1
diff --git a/src/demo.c b/src/demo.c
index 915d950..19eaee1 100644
--- a/src/demo.c
+++ b/src/demo.c
@@ -63,7 +63,7 @@
     if(l.type == DETECTION){
         get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
     } else if (l.type == REGION){
-        get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
+        get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0, 0);
     } else {
         error("Last layer must produce detections\n");
     }
diff --git a/src/detector.c b/src/detector.c
index 695b068..31c44c0 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -66,7 +66,7 @@
     args.num_boxes = l.max_boxes;
     args.d = &buffer;
     args.type = DETECTION_DATA;
-    args.threads = 4;
+    args.threads = 8;
 
     args.angle = net.angle;
     args.exposure = net.exposure;
@@ -81,6 +81,7 @@
         if(l.random && count++%10 == 0){
             printf("Resizing\n");
             int dim = (rand() % 10 + 10) * 32;
+            if (get_current_batch(net)+100 > net.max_batches) dim = 544;
             //int dim = (rand() % 4 + 16) * 32;
             printf("%d\n", dim);
             args.w = dim;
@@ -208,7 +209,7 @@
     }
 }
 
-void print_imagenet_detections(FILE *fp, int id, box *boxes, float **probs, int total, int classes, int w, int h, int *map)
+void print_imagenet_detections(FILE *fp, int id, box *boxes, float **probs, int total, int classes, int w, int h)
 {
     int i, j;
     for(i = 0; i < total; ++i){
@@ -224,7 +225,6 @@
 
         for(j = 0; j < classes; ++j){
             int class = j;
-            if (map) class = map[j];
             if (probs[i][class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, probs[i][class],
                     xmin, ymin, xmax, ymax);
         }
@@ -233,6 +233,7 @@
 
 void validate_detector(char *datacfg, char *cfgfile, char *weightfile)
 {
+    int j;
     list *options = read_data_cfg(datacfg);
     char *valid_images = option_find_str(options, "valid", "data/train.list");
     char *name_list = option_find_str(options, "names", "data/names.list");
@@ -242,23 +243,6 @@
     int *map = 0;
     if (mapf) map = read_map(mapf);
 
-
-    char buff[1024];
-    char *type = option_find_str(options, "eval", "voc");
-    FILE *fp = 0;
-    int coco = 0;
-    int imagenet = 0;
-    if(0==strcmp(type, "coco")){
-        snprintf(buff, 1024, "%s/coco_results.json", prefix);
-        fp = fopen(buff, "w");
-        fprintf(fp, "[\n");
-        coco = 1;
-    } else if(0==strcmp(type, "imagenet")){
-        snprintf(buff, 1024, "%s/imagenet-detection.txt", prefix);
-        fp = fopen(buff, "w");
-        imagenet = 1;
-    }
-
     network net = parse_network_cfg(cfgfile);
     if(weightfile){
         load_weights(&net, weightfile);
@@ -274,12 +258,31 @@
     layer l = net.layers[net.n-1];
     int classes = l.classes;
 
-    int j;
-    FILE **fps = calloc(classes, sizeof(FILE *));
-    for(j = 0; j < classes; ++j){
-        snprintf(buff, 1024, "%s/%s%s.txt", prefix, base, names[j]);
-        fps[j] = fopen(buff, "w");
+    char buff[1024];
+    char *type = option_find_str(options, "eval", "voc");
+    FILE *fp = 0;
+    FILE **fps = 0;
+    int coco = 0;
+    int imagenet = 0;
+    if(0==strcmp(type, "coco")){
+        snprintf(buff, 1024, "%s/coco_results.json", prefix);
+        fp = fopen(buff, "w");
+        fprintf(fp, "[\n");
+        coco = 1;
+    } else if(0==strcmp(type, "imagenet")){
+        snprintf(buff, 1024, "%s/imagenet-detection.txt", prefix);
+        fp = fopen(buff, "w");
+        imagenet = 1;
+        classes = 200;
+    } else {
+        fps = calloc(classes, sizeof(FILE *));
+        for(j = 0; j < classes; ++j){
+            snprintf(buff, 1024, "%s/%s%s.txt", prefix, base, names[j]);
+            fps[j] = fopen(buff, "w");
+        }
     }
+
+
     box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
     float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
     for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
@@ -330,12 +333,12 @@
             network_predict(net, X);
             int w = val[t].w;
             int h = val[t].h;
-            get_region_boxes(l, w, h, thresh, probs, boxes, 0);
+            get_region_boxes(l, w, h, thresh, probs, boxes, 0, map);
             if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
             if (coco){
                 print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
             } else if (imagenet){
-                print_imagenet_detections(fp, i+t-nthreads+1 + 9741, boxes, probs, l.w*l.h*l.n, 200, w, h, map);
+                print_imagenet_detections(fp, i+t-nthreads+1, boxes, probs, l.w*l.h*l.n, classes, w, h);
             } else {
                 print_detector_detections(fps, id, boxes, probs, l.w*l.h*l.n, classes, w, h);
             }
@@ -345,7 +348,7 @@
         }
     }
     for(j = 0; j < classes; ++j){
-        fclose(fps[j]);
+        if(fps) fclose(fps[j]);
     }
     if(coco){
         fseek(fp, -2, SEEK_CUR); 
@@ -394,7 +397,7 @@
         image sized = resize_image(orig, net.w, net.h);
         char *id = basecfg(path);
         network_predict(net, sized.data);
-        get_region_boxes(l, 1, 1, thresh, probs, boxes, 1);
+        get_region_boxes(l, 1, 1, thresh, probs, boxes, 1, 0);
         if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms);
 
         char labelpath[4096];
@@ -473,7 +476,7 @@
         time=clock();
         network_predict(net, X);
         printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
-        get_region_boxes(l, 1, 1, thresh, probs, boxes, 0);
+        get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0);
         if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
         draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
         save_image(im, "predictions");
diff --git a/src/region_layer.c b/src/region_layer.c
index 902778c..9095b3c 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -196,7 +196,8 @@
                 if(truth.x > 100000 && truth.y > 100000){
                     for(n = 0; n < l.n*l.w*l.h; ++n){
                         int index = size*n + b*l.outputs + 5;
-                        float p = get_hierarchy_probability(l.output + index, l.softmax_tree, class);
+                        float scale =  l.output[index-1];
+                        float p = scale*get_hierarchy_probability(l.output + index, l.softmax_tree, class);
                         if(p > maxp){
                             maxp = p;
                             maxi = n;
@@ -324,7 +325,7 @@
     axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
 }
 
-void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness)
+void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map)
 {
     int i,j,n;
     float *predictions = l.output;
@@ -348,8 +349,13 @@
 
                 hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0);
                 int found = 0;
-                for(j = l.classes - 1; j >= 0; --j){
-                    if(1){
+                if(map){
+                    for(j = 0; j < 200; ++j){
+                        float prob = scale*predictions[class_index+map[j]];
+                        probs[index][j] = (prob > thresh) ? prob : 0;
+                    }
+                } else {
+                    for(j = l.classes - 1; j >= 0; --j){
                         if(!found && predictions[class_index + j] > .5){
                             found = 1;
                         } else {
@@ -357,12 +363,9 @@
                         }
                         float prob = predictions[class_index+j];
                         probs[index][j] = (scale > thresh) ? prob : 0;
-                    }else{
-                        float prob = scale*predictions[class_index+j];
-                        probs[index][j] = (prob > thresh) ? prob : 0;
                     }
                 }
-            }else{
+            } else {
                 for(j = 0; j < l.classes; ++j){
                     float prob = scale*predictions[class_index+j];
                     probs[index][j] = (prob > thresh) ? prob : 0;
diff --git a/src/region_layer.h b/src/region_layer.h
index 3d04d66..a8cdd93 100644
--- a/src/region_layer.h
+++ b/src/region_layer.h
@@ -9,7 +9,7 @@
 region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords);
 void forward_region_layer(const region_layer l, network_state state);
 void backward_region_layer(const region_layer l, network_state state);
-void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
+void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map);
 void resize_region_layer(layer *l, int w, int h);
 
 #ifdef GPU

--
Gitblit v1.10.0