From a2f3644e87c11c222f86aa2aeb0e31d858ebb024 Mon Sep 17 00:00:00 2001
From: Alexey <AlexeyAB@users.noreply.github.com>
Date: Sat, 21 Apr 2018 20:56:46 +0000
Subject: [PATCH] Merge pull request #675 from IlyaOvodov/Flip_Fix

---
 src/network.h    |    1 +
 src/parser.c     |    1 +
 src/data.c       |   31 ++++++++++++++++---------------
 src/detector.c   |    1 +
 src/classifier.c |    2 ++
 src/data.h       |    9 +++++----
 6 files changed, 26 insertions(+), 19 deletions(-)

diff --git a/src/classifier.c b/src/classifier.c
index d42a917..e45f5a1 100644
--- a/src/classifier.c
+++ b/src/classifier.c
@@ -87,6 +87,7 @@
 
     args.min = net.min_crop;
     args.max = net.max_crop;
+    args.flip = net.flip;
     args.angle = net.angle;
     args.aspect = net.aspect;
     args.exposure = net.exposure;
@@ -193,6 +194,7 @@
 
    args.min = net.min_crop;
    args.max = net.max_crop;
+   args.flip = net.flip;
    args.angle = net.angle;
    args.aspect = net.aspect;
    args.exposure = net.exposure;
diff --git a/src/data.c b/src/data.c
index a9dba1d..3b014b4 100644
--- a/src/data.c
+++ b/src/data.c
@@ -104,7 +104,7 @@
     return X;
 }
 
-matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
+matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
 {
     int i;
     matrix X;
@@ -115,8 +115,9 @@
     for(i = 0; i < n; ++i){
         image im = load_image_color(paths[i], 0, 0);
         image crop = random_augment_image(im, angle, aspect, min, max, size);
-        int flip = random_gen()%2;
-        if (flip) flip_image(crop);
+        int flip = use_flip ? random_gen() % 2 : 0;
+        if (flip)
+            flip_image(crop);
         random_distort_image(crop, hue, saturation, exposure);
 
         /*
@@ -685,7 +686,7 @@
 
 #include "http_stream.h"
 
-data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure, int small_object)
+data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
 {
     char **random_paths = get_random_paths(paths, n, m);
     int i;
@@ -729,7 +730,7 @@
         float sx = (float)swidth  / ow;
         float sy = (float)sheight / oh;
 
-        int flip = random_gen()%2;
+        int flip = use_flip ? random_gen()%2 : 0;
 
         float dx = ((float)pleft/ow)/sx;
         float dy = ((float)ptop /oh)/sy;
@@ -752,7 +753,7 @@
     return d;
 }
 #else	// OPENCV
-data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure, int small_object)
+data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
 {
 	char **random_paths = get_random_paths(paths, n, m);
 	int i;
@@ -784,7 +785,7 @@
 		float sx = (float)swidth / ow;
 		float sy = (float)sheight / oh;
 
-		int flip = random_gen() % 2;
+		int flip = use_flip ? random_gen() % 2 : 0;
 		image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
 
 		float dx = ((float)pleft / ow) / sx;
@@ -817,7 +818,7 @@
     if (a.type == OLD_CLASSIFICATION_DATA){
         *a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
     } else if (a.type == CLASSIFICATION_DATA){
-        *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
+        *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
     } else if (a.type == SUPER_DATA){
         *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
     } else if (a.type == WRITING_DATA){
@@ -825,7 +826,7 @@
     } else if (a.type == REGION_DATA){
         *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
     } else if (a.type == DETECTION_DATA){
-        *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure, a.small_object);
+        *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.flip, a.jitter, a.hue, a.saturation, a.exposure, a.small_object);
     } else if (a.type == SWAG_DATA){
         *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
     } else if (a.type == COMPARE_DATA){
@@ -837,7 +838,7 @@
 		*(a.im) = load_image_color(a.path, 0, 0);
 		*(a.resized) = letterbox_image(*(a.im), a.w, a.h);
     } else if (a.type == TAG_DATA){
-        *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
+        *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.flip, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
     }
     free(ptr);
     return 0;
@@ -924,7 +925,7 @@
    d.indexes = calloc(n, sizeof(int));
    if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
    d.shallow = 0;
-   d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
+   d.X = load_image_augment_paths(paths, n, flip, min, max, size, angle, aspect, hue, saturation, exposure);
    d.y = load_labels_paths(paths, n, labels, k);
    if(m) free(paths);
    return d;
@@ -961,25 +962,25 @@
     return d;
 }
 
-data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
+data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
 {
     if(m) paths = get_random_paths(paths, n, m);
     data d = {0};
     d.shallow = 0;
-    d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
+    d.X = load_image_augment_paths(paths, n, use_flip, min, max, size, angle, aspect, hue, saturation, exposure);
     d.y = load_labels_paths(paths, n, labels, k, hierarchy);
     if(m) free(paths);
     return d;
 }
 
-data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
+data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
 {
     if(m) paths = get_random_paths(paths, n, m);
     data d = {0};
     d.w = size;
     d.h = size;
     d.shallow = 0;
-    d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
+    d.X = load_image_augment_paths(paths, n, use_flip, min, max, size, angle, aspect, hue, saturation, exposure);
     d.y = load_tags_paths(paths, n, k);
     if(m) free(paths);
     return d;
diff --git a/src/data.h b/src/data.h
index e50ad00..57f4702 100644
--- a/src/data.h
+++ b/src/data.h
@@ -55,6 +55,7 @@
     int scale;
 	int small_object;
     float jitter;
+    int flip;
     float angle;
     float aspect;
     float saturation;
@@ -83,11 +84,11 @@
 data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
 data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
 data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
-data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure, int small_object);
-data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
-matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
+data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object);
+data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
+matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
 data load_data_super(char **paths, int n, int m, int w, int h, int scale);
-data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
+data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
 data load_go(char *filename);
 
 box_label *read_boxes(char *filename, int *n);
diff --git a/src/detector.c b/src/detector.c
index 86f6afc..ea5b49d 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -86,6 +86,7 @@
     args.n = imgs;
     args.m = plist->size;
     args.classes = classes;
+    args.flip = net.flip;
     args.jitter = jitter;
     args.num_boxes = l.max_boxes;
 	args.small_object = net.small_object;
diff --git a/src/network.h b/src/network.h
index b609546..fe160a9 100644
--- a/src/network.h
+++ b/src/network.h
@@ -52,6 +52,7 @@
     int h, w, c;
     int max_crop;
     int min_crop;
+    int flip; // horizontal flip 50% probability augmentaiont for classifier training (default = 1)
     float angle;
     float aspect;
     float exposure;
diff --git a/src/parser.c b/src/parser.c
index 568e540..7441ae2 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -622,6 +622,7 @@
     net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c);
     net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
     net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
+	net->flip = option_find_int_quiet(options, "flip", 1);
 
 	net->small_object = option_find_int_quiet(options, "small_object", 0);
     net->angle = option_find_float_quiet(options, "angle", 0);

--
Gitblit v1.10.0