From 6b38dcdce07b0c082334e7d9e10fe26bc440a347 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Thu, 08 Sep 2016 05:27:56 +0000
Subject: [PATCH] adding new tiny-yolo

---
 src/utils.h               |    1 
 src/network.h             |    1 
 src/connected_layer.c     |   19 ++++
 src/connected_layer.h     |    1 
 src/data.c                |   23 +++--
 src/classifier.c          |    1 
 src/region_layer.c        |    6 -
 src/data.h                |    9 +-
 src/image.c               |   29 +++++--
 src/activations.h         |    2 
 src/parser.c              |    1 
 src/detector.c            |    7 +
 src/activation_kernels.cu |    2 
 cfg/tiny-yolo.cfg         |   50 ++++--------
 src/darknet.c             |   35 ++++++++
 src/image.h               |    2 
 src/utils.c               |    7 +
 17 files changed, 132 insertions(+), 64 deletions(-)

diff --git a/cfg/yolo-tiny.cfg b/cfg/tiny-yolo.cfg
similarity index 72%
rename from cfg/yolo-tiny.cfg
rename to cfg/tiny-yolo.cfg
index c72ed9b..ac4b346 100644
--- a/cfg/yolo-tiny.cfg
+++ b/cfg/tiny-yolo.cfg
@@ -1,27 +1,24 @@
 [net]
 batch=64
-subdivisions=64
+subdivisions=2
 height=448
 width=448
 channels=3
 momentum=0.9
 decay=0.0005
 
-learning_rate=0.0001
+saturation=.75
+exposure=.75
+hue = .1
+
+learning_rate=0.0005
 policy=steps
-steps=20,40,60,80,20000,30000
-scales=5,5,2,2,.1,.1
+steps=200,400,600,800,20000,30000
+scales=2.5,2,2,2,.1,.1
 max_batches = 40000
 
-[crop]
-crop_width=448
-crop_height=448
-flip=0
-angle=0
-saturation = 1.5
-exposure = 1.5
-
 [convolutional]
+batch_normalize=1
 filters=16
 size=3
 stride=1
@@ -33,6 +30,7 @@
 stride=2
 
 [convolutional]
+batch_normalize=1
 filters=32
 size=3
 stride=1
@@ -44,6 +42,7 @@
 stride=2
 
 [convolutional]
+batch_normalize=1
 filters=64
 size=3
 stride=1
@@ -55,6 +54,7 @@
 stride=2
 
 [convolutional]
+batch_normalize=1
 filters=128
 size=3
 stride=1
@@ -66,6 +66,7 @@
 stride=2
 
 [convolutional]
+batch_normalize=1
 filters=256
 size=3
 stride=1
@@ -77,6 +78,7 @@
 stride=2
 
 [convolutional]
+batch_normalize=1
 filters=512
 size=3
 stride=1
@@ -88,37 +90,21 @@
 stride=2
 
 [convolutional]
-filters=1024
+batch_normalize=1
 size=3
 stride=1
 pad=1
+filters=1024
 activation=leaky
 
 [convolutional]
-filters=1024
+batch_normalize=1
 size=3
 stride=1
 pad=1
+filters=256
 activation=leaky
 
-[convolutional]
-filters=1024
-size=3
-stride=1
-pad=1
-activation=leaky
-
-[connected]
-output=256
-activation=linear
-
-[connected]
-output=4096
-activation=leaky
-
-[dropout]
-probability=.5
-
 [connected]
 output= 1470
 activation=linear
diff --git a/src/activation_kernels.cu b/src/activation_kernels.cu
index 5d61529..994e206 100644
--- a/src/activation_kernels.cu
+++ b/src/activation_kernels.cu
@@ -31,7 +31,7 @@
 __device__ float loggy_activate_kernel(float x){return 2./(1. + exp(-x)) - 1;}
 __device__ float relu_activate_kernel(float x){return x*(x>0);}
 __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
-__device__ float relie_activate_kernel(float x){return x*(x>0);}
+__device__ float relie_activate_kernel(float x){return (x>0) ? x : .01*x;}
 __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;}
 __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;}
 __device__ float tanh_activate_kernel(float x){return (2/(1 + exp(-2*x)) - 1);}
diff --git a/src/activations.h b/src/activations.h
index d1b8c37..1c36ff5 100644
--- a/src/activations.h
+++ b/src/activations.h
@@ -36,7 +36,7 @@
 static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;}
 static inline float relu_activate(float x){return x*(x>0);}
 static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);}
-static inline float relie_activate(float x){return x*(x>0);}
+static inline float relie_activate(float x){return (x>0) ? x : .01*x;}
 static inline float ramp_activate(float x){return x*(x>0)+.1*x;}
 static inline float leaky_activate(float x){return (x>0) ? x : .1*x;}
 static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);}
diff --git a/src/classifier.c b/src/classifier.c
index e59f7ae..7ab70e2 100644
--- a/src/classifier.c
+++ b/src/classifier.c
@@ -95,6 +95,7 @@
     args.min = net.min_crop;
     args.max = net.max_crop;
     args.angle = net.angle;
+    args.aspect = net.aspect;
     args.exposure = net.exposure;
     args.saturation = net.saturation;
     args.hue = net.hue;
diff --git a/src/connected_layer.c b/src/connected_layer.c
index 623e6c8..b4ced2d 100644
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@@ -187,7 +187,7 @@
 {
     int i, j;
     for(i = 0; i < l.outputs; ++i){
-        float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001);
+        float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001);
         for(j = 0; j < l.inputs; ++j){
             l.weights[i*l.inputs + j] *= scale;
         }
@@ -198,6 +198,23 @@
     }
 }
 
+
+void statistics_connected_layer(layer l)
+{
+    if(l.batch_normalize){
+        printf("Scales ");
+        print_statistics(l.scales, l.outputs);
+        printf("Rolling Mean ");
+        print_statistics(l.rolling_mean, l.outputs);
+        printf("Rolling Variance ");
+        print_statistics(l.rolling_variance, l.outputs);
+    }
+    printf("Biases ");
+    print_statistics(l.biases, l.outputs);
+    printf("Weights ");
+    print_statistics(l.weights, l.outputs);
+}
+
 #ifdef GPU
 
 void pull_connected_layer(connected_layer l)
diff --git a/src/connected_layer.h b/src/connected_layer.h
index affcaaf..23797b1 100644
--- a/src/connected_layer.h
+++ b/src/connected_layer.h
@@ -13,6 +13,7 @@
 void backward_connected_layer(connected_layer layer, network_state state);
 void update_connected_layer(connected_layer layer, int batch, float learning_rate, float momentum, float decay);
 void denormalize_connected_layer(layer l);
+void statistics_connected_layer(layer l);
 
 #ifdef GPU
 void forward_connected_layer_gpu(connected_layer layer, network_state state);
diff --git a/src/darknet.c b/src/darknet.c
index 263349e..128d231 100644
--- a/src/darknet.c
+++ b/src/darknet.c
@@ -254,6 +254,39 @@
     save_weights(net, outfile);
 }
 
+void statistics_net(char *cfgfile, char *weightfile)
+{
+    gpu_index = -1;
+    network net = parse_network_cfg(cfgfile);
+    if (weightfile) {
+        load_weights(&net, weightfile);
+    }
+    int i;
+    for (i = 0; i < net.n; ++i) {
+        layer l = net.layers[i];
+        if (l.type == CONNECTED && l.batch_normalize) {
+            printf("Connected Layer %d\n", i);
+            statistics_connected_layer(l);
+        }
+        if (l.type == GRU && l.batch_normalize) {
+            printf("GRU Layer %d\n", i);
+            printf("Input Z\n");
+            statistics_connected_layer(*l.input_z_layer);
+            printf("Input R\n");
+            statistics_connected_layer(*l.input_r_layer);
+            printf("Input H\n");
+            statistics_connected_layer(*l.input_h_layer);
+            printf("State Z\n");
+            statistics_connected_layer(*l.state_z_layer);
+            printf("State R\n");
+            statistics_connected_layer(*l.state_r_layer);
+            printf("State H\n");
+            statistics_connected_layer(*l.state_h_layer);
+        }
+        printf("\n");
+    }
+}
+
 void denormalize_net(char *cfgfile, char *weightfile, char *outfile)
 {
     gpu_index = -1;
@@ -374,6 +407,8 @@
         reset_normalize_net(argv[2], argv[3], argv[4]);
     } else if (0 == strcmp(argv[1], "denormalize")){
         denormalize_net(argv[2], argv[3], argv[4]);
+    } else if (0 == strcmp(argv[1], "statistics")){
+        statistics_net(argv[2], argv[3]);
     } else if (0 == strcmp(argv[1], "normalize")){
         normalize_net(argv[2], argv[3], argv[4]);
     } else if (0 == strcmp(argv[1], "rescale")){
diff --git a/src/data.c b/src/data.c
index 09872e5..02dbac4 100644
--- a/src/data.c
+++ b/src/data.c
@@ -100,7 +100,7 @@
     return X;
 }
 
-matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure)
+matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
 {
     int i;
     matrix X;
@@ -110,7 +110,7 @@
 
     for(i = 0; i < n; ++i){
         image im = load_image_color(paths[i], 0, 0);
-        image crop = random_augment_image(im, angle, min, max, size);
+        image crop = random_augment_image(im, angle, aspect, min, max, size);
         int flip = rand_r(&data_seed)%2;
         if (flip) flip_image(crop);
         random_distort_image(crop, hue, saturation, exposure);
@@ -676,15 +676,16 @@
     load_args a = *(struct load_args*)ptr;
     if(a.exposure == 0) a.exposure = 1;
     if(a.saturation == 0) a.saturation = 1;
+    if(a.aspect == 0) a.aspect = 1;
 
     if (a.type == OLD_CLASSIFICATION_DATA){
         *a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
     } else if (a.type == CLASSIFICATION_DATA){
-        *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
+        *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
     } else if (a.type == SUPER_DATA){
         *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
     } else if (a.type == STUDY_DATA){
-        *a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
+        *a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
     } else if (a.type == WRITING_DATA){
         *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
     } else if (a.type == REGION_DATA){
@@ -699,7 +700,7 @@
         *(a.im) = load_image_color(a.path, 0, 0);
         *(a.resized) = resize_image(*(a.im), a.w, a.h);
     } else if (a.type == TAG_DATA){
-        *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
+        *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
         //*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
     }
     free(ptr);
@@ -741,13 +742,13 @@
     return d;
 }
 
-data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
+data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
 {
     data d = {0};
     d.indexes = calloc(n, sizeof(int));
     if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
     d.shallow = 0;
-    d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
+    d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
     d.y = load_labels_paths(paths, n, labels, k);
     if(m) free(paths);
     return d;
@@ -783,25 +784,25 @@
     return d;
 }
 
-data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
+data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
 {
     if(m) paths = get_random_paths(paths, n, m);
     data d = {0};
     d.shallow = 0;
-    d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
+    d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
     d.y = load_labels_paths(paths, n, labels, k);
     if(m) free(paths);
     return d;
 }
 
-data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
+data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
 {
     if(m) paths = get_random_paths(paths, n, m);
     data d = {0};
     d.w = size;
     d.h = size;
     d.shallow = 0;
-    d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
+    d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
     d.y = load_tags_paths(paths, n, k);
     if(m) free(paths);
     return d;
diff --git a/src/data.h b/src/data.h
index c3e74cd..07c994b 100644
--- a/src/data.h
+++ b/src/data.h
@@ -52,6 +52,7 @@
     int scale;
     float jitter;
     float angle;
+    float aspect;
     float saturation;
     float exposure;
     float hue;
@@ -76,11 +77,11 @@
 data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
 data load_data(char **paths, int n, int m, char **labels, int k, int w, int h);
 data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure);
-data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
-matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure);
+data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
+matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
 data load_data_super(char **paths, int n, int m, int w, int h, int scale);
-data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
-data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
+data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
+data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
 data load_go(char *filename);
 
 box_label *read_boxes(char *filename, int *n);
diff --git a/src/detector.c b/src/detector.c
index becdd6c..39363e4 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -117,6 +117,10 @@
             int box_index = index * (classes + 5);
             boxes[index].x = (predictions[box_index + 0] + col + .5) / side * w;
             boxes[index].y = (predictions[box_index + 1] + row + .5) / side * h;
+            if(1){
+                boxes[index].x = (logistic_activate(predictions[box_index + 0]) + col) / side * w;
+                boxes[index].y = (logistic_activate(predictions[box_index + 1]) + row) / side * h;
+            }
             boxes[index].w = pow(logistic_activate(predictions[box_index + 2]), (square?2:1)) * w;
             boxes[index].h = pow(logistic_activate(predictions[box_index + 3]), (square?2:1)) * h;
             for(j = 0; j < classes; ++j){
@@ -237,6 +241,9 @@
             free_image(val_resized[t]);
         }
     }
+    for(j = 0; j < classes; ++j){
+        fclose(fps[j]);
+    }
     fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
 }
 
diff --git a/src/image.c b/src/image.c
index de896f2..21c2f8b 100644
--- a/src/image.c
+++ b/src/image.c
@@ -479,7 +479,8 @@
     return out;
 }
 
-image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
+
+image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect)
 {
     int x, y, c;
     float cx = im.w/2.;
@@ -488,8 +489,8 @@
     for(c = 0; c < im.c; ++c){
         for(y = 0; y < h; ++y){
             for(x = 0; x < w; ++x){
-                float rx = cos(rad)*(x/s + dx/s -cx) - sin(rad)*(y/s + dy/s -cy) + cx;
-                float ry = sin(rad)*(x/s + dx/s -cx) + cos(rad)*(y/s + dy/s -cy) + cy;
+                float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx;
+                float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy;
                 float val = bilinear_interpolate(im, rx, ry, c);
                 set_pixel(rot, x, y, c, val);
             }
@@ -642,18 +643,23 @@
     return crop;
 }
 
-image random_augment_image(image im, float angle, int low, int high, int size)
+image random_augment_image(image im, float angle, float aspect, int low, int high, int size)
 {
+    aspect = rand_scale(aspect);
     int r = rand_int(low, high);
-    int min = (im.h < im.w) ? im.h : im.w;
+    int min = (im.h < im.w*aspect) ? im.h : im.w*aspect;
     float scale = (float)r / min;
 
     float rad = rand_uniform(-angle, angle) * TWO_PI / 360.;
-    int dx = rand_int(0, scale * im.w - size);
-    int dy = rand_int(0, scale * im.h - size);
-    //printf("%d %d\n", dx, dy);
 
-    image crop = rotate_crop_image(im, rad, scale, size, size, dx, dy);
+    float dx = (im.w*scale/aspect - size) / 2.;
+    float dy = (im.h*scale - size) / 2.;
+    if(dx < 0) dx = 0;
+    if(dy < 0) dy = 0;
+    dx = rand_uniform(-dx, dx);
+    dy = rand_uniform(-dy, dy);
+
+    image crop = rotate_crop_image(im, rad, scale, size, size, dx, dy, aspect);
 
     return crop;
 }
@@ -971,6 +977,11 @@
     show_image(c4, "C4");
 #ifdef OPENCV
     while(1){
+        image aug = random_augment_image(im, 0, 320, 448, 320, .75);
+        show_image(aug, "aug");
+        free_image(aug);
+        
+
         float exposure = 1.15;
         float saturation = 1.15;
         float hue = .05;
diff --git a/src/image.h b/src/image.h
index 213c4b4..e124860 100644
--- a/src/image.h
+++ b/src/image.h
@@ -31,7 +31,7 @@
 void scale_image(image m, float s);
 image crop_image(image im, int dx, int dy, int w, int h);
 image random_crop_image(image im, int w, int h);
-image random_augment_image(image im, float angle, int low, int high, int size);
+image random_augment_image(image im, float angle, float aspect, int low, int high, int size);
 void random_distort_image(image im, float hue, float saturation, float exposure);
 image resize_image(image im, int w, int h);
 image resize_min(image im, int min);
diff --git a/src/network.h b/src/network.h
index b7ba39c..d850af8 100644
--- a/src/network.h
+++ b/src/network.h
@@ -41,6 +41,7 @@
     int max_crop;
     int min_crop;
     float angle;
+    float aspect;
     float exposure;
     float saturation;
     float hue;
diff --git a/src/parser.c b/src/parser.c
index 626f510..483c767 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -497,6 +497,7 @@
     net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
 
     net->angle = option_find_float_quiet(options, "angle", 0);
+    net->aspect = option_find_float_quiet(options, "aspect", 1);
     net->saturation = option_find_float_quiet(options, "saturation", 1);
     net->exposure = option_find_float_quiet(options, "exposure", 1);
     net->hue = option_find_float_quiet(options, "hue", 0);
diff --git a/src/region_layer.c b/src/region_layer.c
index 2185ea5..5fe37c5 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -80,8 +80,8 @@
         b.w = logistic_activate(x[index + 2]);
         b.h = logistic_activate(x[index + 3]);
     }
-    //if(adjust && b.w < .01) b.w = .01;
-    //if(adjust && b.h < .01) b.h = .01;
+    if(adjust && b.w < .01) b.w = .01;
+    if(adjust && b.h < .01) b.h = .01;
     return b;
 }
 
@@ -149,7 +149,6 @@
                     l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4]));
                     if(best_iou > .5) l.delta[index + 4] = 0;
 
-                    /*
                     if(*(state.net.seen) < 6400){
                         box truth = {0};
                         truth.x = (i + .5)/l.w;
@@ -158,7 +157,6 @@
                         truth.h = .5;
                         delta_region_box(truth, l.output, index, i, j, l.w, l.h, l.delta, LOG, 1);
                     }
-                    */
                 }
             }
         }
diff --git a/src/utils.c b/src/utils.c
index a6cb421..55f64b8 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -414,6 +414,13 @@
     }
 }
 
+void print_statistics(float *a, int n)
+{
+    float m = mean_array(a, n);
+    float v = variance_array(a, n);
+    printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v);
+}
+
 float variance_array(float *a, int n)
 {
     int i;
diff --git a/src/utils.h b/src/utils.h
index 3f0233a..185e5e3 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -57,6 +57,7 @@
 int find_arg(int argc, char* argv[], char *arg);
 char *find_char_arg(int argc, char **argv, char *arg, char *def);
 int sample_array(float *a, int n);
+void print_statistics(float *a, int n);
 
 #endif
 

--
Gitblit v1.10.0