From aebe937710ced03d03f73ab23f410f29685655c1 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Thu, 11 Aug 2016 18:54:24 +0000
Subject: [PATCH] what do you even write here?

---
 src/image.c           |   69 +++++++++++------
 cfg/yolo.cfg          |   53 +++++++++++-
 src/utils.h           |    1 
 src/network.h         |    3 
 src/connected_layer.c |    2 
 src/parser.c          |    4 +
 src/data.c            |   30 ++++--
 src/classifier.c      |   21 +++--
 src/go.c              |    2 
 src/data.h            |    9 +
 src/image.h           |    2 
 src/utils.c           |    6 +
 12 files changed, 144 insertions(+), 58 deletions(-)

diff --git a/cfg/yolo.cfg b/cfg/yolo.cfg
index 1f69856..6876eff 100644
--- a/cfg/yolo.cfg
+++ b/cfg/yolo.cfg
@@ -1,19 +1,29 @@
 [net]
-batch=1
-subdivisions=1
+batch=64
+subdivisions=2
 height=448
 width=448
 channels=3
 momentum=0.9
 decay=0.0005
 
-learning_rate=0.001
+learning_rate=0.0005
 policy=steps
 steps=200,400,600,20000,30000
 scales=2.5,2,2,.1,.1
 max_batches = 40000
 
+[crop]
+crop_width=448
+crop_height=448
+flip=0
+angle=0
+saturation = 1.5
+exposure = 1.5
+noadjust=1
+
 [convolutional]
+batch_normalize=1
 filters=64
 size=7
 stride=2
@@ -25,6 +35,7 @@
 stride=2
 
 [convolutional]
+batch_normalize=1
 filters=192
 size=3
 stride=1
@@ -36,6 +47,7 @@
 stride=2
 
 [convolutional]
+batch_normalize=1
 filters=128
 size=1
 stride=1
@@ -43,6 +55,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=256
 size=3
 stride=1
@@ -50,6 +63,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=256
 size=1
 stride=1
@@ -57,6 +71,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=512
 size=3
 stride=1
@@ -68,6 +83,7 @@
 stride=2
 
 [convolutional]
+batch_normalize=1
 filters=256
 size=1
 stride=1
@@ -75,6 +91,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=512
 size=3
 stride=1
@@ -82,6 +99,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=256
 size=1
 stride=1
@@ -89,6 +107,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=512
 size=3
 stride=1
@@ -96,6 +115,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=256
 size=1
 stride=1
@@ -103,6 +123,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=512
 size=3
 stride=1
@@ -110,6 +131,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=256
 size=1
 stride=1
@@ -117,6 +139,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=512
 size=3
 stride=1
@@ -124,6 +147,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=512
 size=1
 stride=1
@@ -131,6 +155,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=1024
 size=3
 stride=1
@@ -142,6 +167,7 @@
 stride=2
 
 [convolutional]
+batch_normalize=1
 filters=512
 size=1
 stride=1
@@ -149,6 +175,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=1024
 size=3
 stride=1
@@ -156,6 +183,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=512
 size=1
 stride=1
@@ -163,6 +191,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 filters=1024
 size=3
 stride=1
@@ -172,6 +201,7 @@
 #######
 
 [convolutional]
+batch_normalize=1
 size=3
 stride=1
 pad=1
@@ -179,6 +209,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 size=3
 stride=2
 pad=1
@@ -186,6 +217,7 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 size=3
 stride=1
 pad=1
@@ -193,18 +225,25 @@
 activation=leaky
 
 [convolutional]
+batch_normalize=1
 size=3
 stride=1
 pad=1
 filters=1024
 activation=leaky
 
-[connected]
-output=4096
+[local]
+size=3
+stride=1
+pad=1
+filters=256
 activation=leaky
 
+[dropout]
+probability=.5
+
 [connected]
-output= 1470
+output= 1715
 activation=linear
 
 [detection]
@@ -212,7 +251,7 @@
 coords=4
 rescore=1
 side=7
-num=2
+num=3
 softmax=0
 sqrt=1
 jitter=.2
diff --git a/src/classifier.c b/src/classifier.c
index 608e3ab..ee6d212 100644
--- a/src/classifier.c
+++ b/src/classifier.c
@@ -41,7 +41,7 @@
 
 void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
 {
-    int nthreads = 2;
+    int nthreads = 8;
     int i;
 
     data_seed = time(0);
@@ -82,6 +82,9 @@
 
     args.min = net.min_crop;
     args.max = net.max_crop;
+    args.angle = net.angle;
+    args.exposure = net.exposure;
+    args.saturation = net.saturation;
     args.size = net.w;
 
     args.paths = paths;
@@ -113,14 +116,14 @@
         printf("Loaded: %lf seconds\n", sec(clock()-time));
         time=clock();
 
-        /*
-           int u;
-           for(u = 0; u < net.batch; ++u){
-           image im = float_to_image(net.w, net.h, 3, train.X.vals[u]);
-           show_image(im, "loaded");
-           cvWaitKey(0);
-           }
-         */
+        if(0){
+            int u;
+            for(u = 0; u < imgs; ++u){
+                image im = float_to_image(net.w, net.h, 3, train.X.vals[u]);
+                show_image(im, "loaded");
+                cvWaitKey(0);
+            }
+        }
 
         float loss = train_network(net, train);
         if(avg_loss == -1) avg_loss = loss;
diff --git a/src/connected_layer.c b/src/connected_layer.c
index e7784ea..623e6c8 100644
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@@ -265,7 +265,7 @@
 void backward_connected_layer_gpu(connected_layer l, network_state state)
 {
     int i;
-    constrain_ongpu(l.outputs*l.batch, 5, l.delta_gpu, 1);
+    constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
     gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
     for(i = 0; i < l.batch; ++i){
         axpy_ongpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1);
diff --git a/src/data.c b/src/data.c
index 231fb93..9aa08af 100644
--- a/src/data.c
+++ b/src/data.c
@@ -100,7 +100,7 @@
     return X;
 }
 
-matrix load_image_cropped_paths(char **paths, int n, int min, int max, int size)
+matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float exposure, float saturation)
 {
     int i;
     matrix X;
@@ -110,9 +110,14 @@
 
     for(i = 0; i < n; ++i){
         image im = load_image_color(paths[i], 0, 0);
-        image crop = random_resize_crop_image(im, min, max, size);
+        image crop = random_augment_image(im, angle, min, max, size);
         int flip = rand_r(&data_seed)%2;
         if (flip) flip_image(crop);
+        float exp = rand_uniform(1./exposure, exposure);
+        float sat = rand_uniform(1./saturation, saturation);
+        exposure_image(crop, exp);
+        exposure_image(crop, sat);
+
         /*
         show_image(im, "orig");
         show_image(crop, "crop");
@@ -668,14 +673,17 @@
 
     //printf("Loading data: %d\n", rand_r(&data_seed));
     load_args a = *(struct load_args*)ptr;
+    if(a.exposure == 0) a.exposure = 1;
+    if(a.saturation == 0) a.saturation = 1;
+
     if (a.type == OLD_CLASSIFICATION_DATA){
         *a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
     } else if (a.type == CLASSIFICATION_DATA){
-        *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size);
+        *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
     } else if (a.type == SUPER_DATA){
         *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
     } else if (a.type == STUDY_DATA){
-        *a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size);
+        *a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
     } else if (a.type == WRITING_DATA){
         *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
     } else if (a.type == REGION_DATA){
@@ -690,7 +698,7 @@
         *(a.im) = load_image_color(a.path, 0, 0);
         *(a.resized) = resize_image(*(a.im), a.w, a.h);
     } else if (a.type == TAG_DATA){
-        *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size);
+        *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
         //*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
     }
     free(ptr);
@@ -732,13 +740,13 @@
     return d;
 }
 
-data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size)
+data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
 {
     data d = {0};
     d.indexes = calloc(n, sizeof(int));
     if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
     d.shallow = 0;
-    d.X = load_image_cropped_paths(paths, n, min, max, size);
+    d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
     d.y = load_labels_paths(paths, n, labels, k);
     if(m) free(paths);
     return d;
@@ -774,25 +782,25 @@
     return d;
 }
 
-data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size)
+data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
 {
     if(m) paths = get_random_paths(paths, n, m);
     data d = {0};
     d.shallow = 0;
-    d.X = load_image_cropped_paths(paths, n, min, max, size);
+    d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
     d.y = load_labels_paths(paths, n, labels, k);
     if(m) free(paths);
     return d;
 }
 
-data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size)
+data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation)
 {
     if(m) paths = get_random_paths(paths, n, m);
     data d = {0};
     d.w = size;
     d.h = size;
     d.shallow = 0;
-    d.X = load_image_cropped_paths(paths, n, min, max, size);
+    d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
     d.y = load_tags_paths(paths, n, k);
     if(m) free(paths);
     return d;
diff --git a/src/data.h b/src/data.h
index 75123a5..1220be0 100644
--- a/src/data.h
+++ b/src/data.h
@@ -51,6 +51,9 @@
     int background;
     int scale;
     float jitter;
+    float angle;
+    float saturation;
+    float exposure;
     data *d;
     image *im;
     image *resized;
@@ -72,10 +75,10 @@
 data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
 data load_data(char **paths, int n, int m, char **labels, int k, int w, int h);
 data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter);
-data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size);
-data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size);
+data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation);
+data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
 data load_data_super(char **paths, int n, int m, int w, int h, int scale);
-data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size);
+data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
 data load_go(char *filename);
 
 box_label *read_boxes(char *filename, int *n);
diff --git a/src/go.c b/src/go.c
index 91beaf1..bb5e60e 100644
--- a/src/go.c
+++ b/src/go.c
@@ -132,7 +132,7 @@
     char buff[256];
     float *board = calloc(19*19*net.batch, sizeof(float));
     float *move = calloc(19*19*net.batch, sizeof(float));
-    moves m = load_go_moves("/home/pjreddie/go.train");
+    moves m = load_go_moves("/home/pjreddie/backup/go.train");
     //moves m = load_go_moves("games.txt");
 
     int N = m.n;
diff --git a/src/image.c b/src/image.c
index fd890d0..fe63b34 100644
--- a/src/image.c
+++ b/src/image.c
@@ -459,6 +459,25 @@
         return out;
     }
 
+    image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
+    {
+        int x, y, c;
+        float cx = im.w/2.;
+        float cy = im.h/2.;
+        image rot = make_image(w, h, im.c);
+        for(c = 0; c < im.c; ++c){
+            for(y = 0; y < h; ++y){
+                for(x = 0; x < w; ++x){
+                    float rx = cos(rad)*(x/s + dx/s -cx) - sin(rad)*(y/s + dy/s -cy) + cx;
+                    float ry = sin(rad)*(x/s + dx/s -cx) + cos(rad)*(y/s + dy/s -cy) + cy;
+                    float val = bilinear_interpolate(im, rx, ry, c);
+                    set_pixel(rot, x, y, c, val);
+                }
+            }
+        }
+        return rot;
+    }
+
     image rotate_image(image im, float rad)
     {
         int x, y, c;
@@ -603,15 +622,19 @@
     return crop;
 }
 
-image random_resize_crop_image(image im, int low, int high, int size)
+image random_augment_image(image im, float angle, int low, int high, int size)
 {
     int r = rand_int(low, high);
-    image resized = resize_min(im, r);
-    int dx = rand_int(0, resized.w - size);
-    int dy = rand_int(0, resized.h - size);
-    image crop = crop_image(resized, dx, dy, size, size);
+    int min = (im.h < im.w) ? im.h : im.w;
+    float scale = (float)r / min;
 
-    if(resized.data != im.data) free_image(resized);
+    float rad = rand_uniform(-angle, angle) * TWO_PI / 360.;
+    int dx = rand_int(0, scale * im.w - size);
+    int dy = rand_int(0, scale * im.h - size);
+    //printf("%d %d\n", dx, dy);
+
+    image crop = rotate_crop_image(im, rad, scale, size, size, dx, dy);
+
     return crop;
 }
 
@@ -794,23 +817,6 @@
     constrain_image(im);
 }
 
-/*
-   image saturate_image(image im, float sat)
-   {
-   image gray = grayscale_image(im);
-   image blend = blend_image(im, gray, sat);
-   free_image(gray);
-   constrain_image(blend);
-   return blend;
-   }
-
-   image brightness_image(image im, float b)
-   {
-   image bright = make_image(im.w, im.h, im.c);
-   return bright;
-   }
- */
-
 float bilinear_interpolate(image im, float x, float y, int c)
 {
     int ix = (int) floorf(x);
@@ -893,6 +899,7 @@
 
     image bin = binarize_image(im);
 
+/*
 #ifdef GPU
     image r = resize_image(im, im.w, im.h);
     image black = make_image(im.w*2 + 3, im.h*2 + 3, 9);
@@ -911,7 +918,16 @@
     show_image_layers(black, "Black");
     show_image(black2, "Recreate");
 #endif
+*/
+    image rot = rotate_crop_image(im, -.2618, 1, im.w/2, im.h/2, 0, 0);
+    image rot3 = rotate_crop_image(im, -.2618, 2, im.w, im.h, im.w/2, 0);
+    image rot2 = rotate_crop_image(im, -.2618, 1, im.w, im.h, 0, 0);
+    show_image(rot, "Rotated");
+    show_image(rot2, "base");
 
+    show_image(rot3, "Rotated2");
+
+/*
     show_image(im,   "Original");
     show_image(bin,  "Binary");
     show_image(gray, "Gray");
@@ -919,6 +935,7 @@
     show_image(sat5, "Saturation-.5");
     show_image(exp2, "Exposure-2");
     show_image(exp5, "Exposure-.5");
+    */
 #ifdef OPENCV
     cvWaitKey(0);
 #endif
@@ -1036,7 +1053,11 @@
 }
 float get_pixel_extend(image m, int x, int y, int c)
 {
-    if(x < 0 || x >= m.w || y < 0 || y >= m.h || c < 0 || c >= m.c) return 0;
+    if(x < 0) x = 0;
+    if(x >= m.w) x = m.w-1;
+    if(y < 0) y = 0;
+    if(y >= m.h) y = m.h-1;
+    if(c < 0 || c >= m.c) return 0;
     return get_pixel(m, x, y, c);
 }
 void set_pixel(image m, int x, int y, int c, float val)
diff --git a/src/image.h b/src/image.h
index e4eecd5..9c53fd3 100644
--- a/src/image.h
+++ b/src/image.h
@@ -31,7 +31,7 @@
 void scale_image(image m, float s);
 image crop_image(image im, int dx, int dy, int w, int h);
 image random_crop_image(image im, int w, int h);
-image random_resize_crop_image(image im, int low, int high, int size);
+image random_augment_image(image im, float angle, int low, int high, int size);
 image resize_image(image im, int w, int h);
 image resize_min(image im, int min);
 void translate_image(image m, float s);
diff --git a/src/network.h b/src/network.h
index 4157387..3306962 100644
--- a/src/network.h
+++ b/src/network.h
@@ -40,6 +40,9 @@
     int h, w, c;
     int max_crop;
     int min_crop;
+    float angle;
+    float exposure;
+    float saturation;
 
     int gpu_index;
 
diff --git a/src/parser.c b/src/parser.c
index 503e7cf..904df1a 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -483,6 +483,10 @@
     net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
     net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
 
+    net->angle = option_find_float_quiet(options, "angle", 0);
+    net->saturation = option_find_float_quiet(options, "saturation", 1);
+    net->exposure = option_find_float_quiet(options, "exposure", 1);
+
     if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
 
     char *policy_s = option_find_str(options, "policy", "constant");
diff --git a/src/utils.c b/src/utils.c
index 7386305..41893fc 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -531,7 +531,6 @@
 }
 
 // From http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
-#define TWO_PI 6.2831853071795864769252866
 float rand_normal()
 {
     static int haveSpare = 0;
@@ -578,6 +577,11 @@
 
 float rand_uniform(float min, float max)
 {
+    if(max < min){
+        float swap = min;
+        min = max;
+        max = swap;
+    }
     return ((float)rand()/RAND_MAX * (max - min)) + min;
 }
 
diff --git a/src/utils.h b/src/utils.h
index cba7f6f..ae0cba2 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -5,6 +5,7 @@
 #include "list.h"
 
 #define SECRET_NUM -1234
+#define TWO_PI 6.2831853071795864769252866
 
 void shuffle(void *arr, size_t n, size_t size);
 void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections);

--
Gitblit v1.10.0