From 8f1f5cbf8321b6b313d8f455d596290e7b8bb3f7 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Sat, 24 Mar 2018 20:35:05 +0000
Subject: [PATCH] 3.5x times accelerated data augmentation using OpenCV for training Yolo on DGX and multi-GPU

---
 src/http_stream.h   |    6 ++
 src/data.c          |  100 +++++++++++++++++++++++++++++---
 src/http_stream.cpp |   62 +++++++++++++++++++-
 3 files changed, 154 insertions(+), 14 deletions(-)

diff --git a/src/data.c b/src/data.c
index 379756a..d9cedf5 100644
--- a/src/data.c
+++ b/src/data.c
@@ -668,6 +668,17 @@
     return d;
 }
 
+#ifdef OPENCV
+#include "opencv2/highgui/highgui_c.h"
+#include "opencv2/imgproc/imgproc_c.h"
+#include "opencv2/core/version.hpp"
+#ifndef CV_VERSION_EPOCH
+#include "opencv2/videoio/videoio_c.h"
+#include "opencv2/imgcodecs/imgcodecs_c.h"
+#endif
+
+#include "http_stream.h"
+
 data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure, int small_object)
 {
     char **random_paths = get_random_paths(paths, n, m);
@@ -681,10 +692,22 @@
 
     d.y = make_matrix(n, 5*boxes);
     for(i = 0; i < n; ++i){
-        image orig = load_image_color(random_paths[i], 0, 0);
+		const char *filename = random_paths[i];
 
-        int oh = orig.h;
-        int ow = orig.w;
+		int flag = 1;
+		IplImage *src;
+		if ((src = cvLoadImage(filename, flag)) == 0)
+		{
+			fprintf(stderr, "Cannot load image \"%s\"\n", filename);
+			char buff[256];
+			sprintf(buff, "echo %s >> bad.list", filename);
+			system(buff);
+			continue;
+			//exit(0);
+		}
+
+		int oh = src->height;
+		int ow = src->width;
 
         int dw = (ow*jitter);
         int dh = (oh*jitter);
@@ -701,25 +724,80 @@
         float sy = (float)sheight / oh;
 
         int flip = random_gen()%2;
-        image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
 
         float dx = ((float)pleft/ow)/sx;
         float dy = ((float)ptop /oh)/sy;
 
-        image sized = resize_image(cropped, w, h);
-        if(flip) flip_image(sized);
-        random_distort_image(sized, hue, saturation, exposure);
-        d.X.vals[i] = sized.data;
+		float dhue = rand_uniform_strong(-hue, hue);
+		float dsat = rand_scale(saturation);
+		float dexp = rand_scale(exposure);
 
-        fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy, small_object);
+		image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, jitter, dhue, dsat, dexp);
+		d.X.vals[i] = ai.data;
+		
+		//show_image(ai, "aug");
+		//cvWaitKey(0);
 
-        free_image(orig);
-        free_image(cropped);
+        fill_truth_detection(filename, boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy, small_object);
+
+		cvReleaseImage(&src);
     }
     free(random_paths);
     return d;
 }
+#else	// OPENCV
+data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure, int small_object)
+{
+	char **random_paths = get_random_paths(paths, n, m);
+	int i;
+	data d = { 0 };
+	d.shallow = 0;
 
+	d.X.rows = n;
+	d.X.vals = calloc(d.X.rows, sizeof(float*));
+	d.X.cols = h*w * 3;
+
+	d.y = make_matrix(n, 5 * boxes);
+	for (i = 0; i < n; ++i) {
+		image orig = load_image_color(random_paths[i], 0, 0);
+
+		int oh = orig.h;
+		int ow = orig.w;
+
+		int dw = (ow*jitter);
+		int dh = (oh*jitter);
+
+		int pleft = rand_uniform_strong(-dw, dw);
+		int pright = rand_uniform_strong(-dw, dw);
+		int ptop = rand_uniform_strong(-dh, dh);
+		int pbot = rand_uniform_strong(-dh, dh);
+
+		int swidth = ow - pleft - pright;
+		int sheight = oh - ptop - pbot;
+
+		float sx = (float)swidth / ow;
+		float sy = (float)sheight / oh;
+
+		int flip = random_gen() % 2;
+		image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
+
+		float dx = ((float)pleft / ow) / sx;
+		float dy = ((float)ptop / oh) / sy;
+
+		image sized = resize_image(cropped, w, h);
+		if (flip) flip_image(sized);
+		random_distort_image(sized, hue, saturation, exposure);
+		d.X.vals[i] = sized.data;
+
+		fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1. / sx, 1. / sy, small_object);
+
+		free_image(orig);
+		free_image(cropped);
+	}
+	free(random_paths);
+	return d;
+}
+#endif	// OPENCV
 
 void *load_thread(void *ptr)
 {
diff --git a/src/http_stream.cpp b/src/http_stream.cpp
index ca57728..5ae8f78 100644
--- a/src/http_stream.cpp
+++ b/src/http_stream.cpp
@@ -47,6 +47,7 @@
 using namespace cv;
 
 #include "http_stream.h"
+#include "image.h"
 
 
 class MJPGWriter
@@ -185,8 +186,7 @@
 		return true;
 	}
 };
-
-
+// ----------------------------------------
 
 void send_mjpeg(IplImage* ipl, int port, int timeout, int quality) {
 	static MJPGWriter wri(port, timeout, quality);
@@ -194,7 +194,7 @@
 	wri.write(mat);
 	std::cout << " MJPEG-stream sent. \n";
 }
-
+// ----------------------------------------
 
 CvCapture* get_capture_webcam(int index) {
 	CvCapture* cap = NULL;
@@ -208,6 +208,7 @@
 	}
 	return cap;
 }
+// ----------------------------------------
 
 IplImage* get_webcam_frame(CvCapture *cap) {
 	IplImage* src = NULL;
@@ -225,6 +226,61 @@
 	}
 	return src;
 }
+// ----------------------------------------
+extern "C" {
+	image ipl_to_image(IplImage* src);	// image.c
+}
+
+image image_data_augmentation(IplImage* ipl, int w, int h,
+	int pleft, int ptop, int swidth, int sheight, int flip,
+	float jitter, float dhue, float dsat, float dexp)
+{
+	cv::Mat img = cv::cvarrToMat(ipl);
+
+	// crop
+	cv::Rect src_rect(pleft, ptop, swidth, sheight);
+	cv::Rect img_rect(cv::Point2i(0, 0), img.size());
+	cv::Rect new_src_rect = src_rect & img_rect;
+
+	cv::Rect dst_rect(cv::Point2i(std::max(0, -pleft), std::max(0, -ptop)), new_src_rect.size());
+
+	cv::Mat cropped(cv::Size(src_rect.width, src_rect.height), img.type());
+	cropped.setTo(cv::Scalar::all(0));
+
+	img(new_src_rect).copyTo(cropped(dst_rect));
+
+	// resize
+	cv::Mat sized;
+	cv::resize(cropped, sized, cv::Size(w, h), 0, 0, INTER_LINEAR);
+
+	// flip
+	if (flip) {
+		cv::flip(sized, cropped, 1);	// 0 - x-axis, 1 - y-axis, -1 - both axes (x & y)
+		sized = cropped.clone();
+	}
+
+	// HSV augmentation
+	// CV_BGR2HSV, CV_RGB2HSV, CV_HSV2BGR, CV_HSV2RGB
+	cv::Mat hsv_src;
+	cvtColor(sized, hsv_src, CV_BGR2HSV);	// also BGR -> RGB
+	
+	std::vector<cv::Mat> hsv;
+	cv::split(hsv_src, hsv);
+
+	hsv[1] *= dsat;
+	hsv[2] *= dexp;
+	hsv[0] += 179 * dhue;
+
+	cv::merge(hsv, hsv_src);
+
+	cvtColor(hsv_src, sized, CV_HSV2RGB);	// now RGB instead of BGR
+
+	// Mat -> IplImage -> image
+	IplImage src = sized;
+	image out = ipl_to_image(&src);
+
+	return out;
+}
 
 
 #endif	// OPENCV
\ No newline at end of file
diff --git a/src/http_stream.h b/src/http_stream.h
index ca7e3ae..b1daf9f 100644
--- a/src/http_stream.h
+++ b/src/http_stream.h
@@ -5,11 +5,17 @@
 #ifdef __cplusplus
 extern "C" {
 #endif
+#include "image.h"
 
 void send_mjpeg(IplImage* ipl, int port, int timeout, int quality);
 CvCapture* get_capture_webcam(int index);
 IplImage* get_webcam_frame(CvCapture *cap);
 
+//image image_data_augmentation(const char *filename, int w, int h,
+image image_data_augmentation(IplImage* ipl, int w, int h,
+	int pleft, int ptop, int swidth, int sheight, int flip,
+	float jitter, float dhue, float dsat, float dexp);
+
 #ifdef __cplusplus
 }
 #endif

--
Gitblit v1.10.0