~speedprog/mtg/mtg_card_detector.git

parent: 101de2b0 | patch | commit | ignore whitespace

3.5x times accelerated data augmentation using OpenCV for training Yolo on ...

AlexeyAB

2018-03-24 8f1f5cbf8321b6b313d8f455d596290e7b8bb3f7

3.5x times accelerated data augmentation using OpenCV for training Yolo on DGX and multi-GPU

3 files modified

	src/data.c	100 ●●●●● patch \| view \| raw \| blame \| history
	src/http_stream.cpp	62 ●●●●● patch \| view \| raw \| blame \| history
	src/http_stream.h	6 ●●●●● patch \| view \| raw \| blame \| history

 src/data.c

@@ -668,6 +668,17 @@
    return d;
}

#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
#include "opencv2/imgproc/imgproc_c.h"
#include "opencv2/core/version.hpp"
#ifndef CV_VERSION_EPOCH
#include "opencv2/videoio/videoio_c.h"
#include "opencv2/imgcodecs/imgcodecs_c.h"
#endif

#include "http_stream.h"

data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure, int small_object)
{
    char **random_paths = get_random_paths(paths, n, m);
@@ -681,10 +692,22 @@

    d.y = make_matrix(n, 5*boxes);
    for(i = 0; i < n; ++i){
        image orig = load_image_color(random_paths[i], 0, 0);
        const char *filename = random_paths[i];

        int oh = orig.h;
        int ow = orig.w;
        int flag = 1;
        IplImage *src;
        if ((src = cvLoadImage(filename, flag)) == 0)
        {
            fprintf(stderr, "Cannot load image \"%s\"\n", filename);
            char buff[256];
            sprintf(buff, "echo %s >> bad.list", filename);
            system(buff);
            continue;
            //exit(0);
        }

        int oh = src->height;
        int ow = src->width;

        int dw = (ow*jitter);
        int dh = (oh*jitter);
@@ -701,25 +724,80 @@
        float sy = (float)sheight / oh;

        int flip = random_gen()%2;
        image cropped = crop_image(orig, pleft, ptop, swidth, sheight);

        float dx = ((float)pleft/ow)/sx;
        float dy = ((float)ptop /oh)/sy;

        image sized = resize_image(cropped, w, h);
        if(flip) flip_image(sized);
        random_distort_image(sized, hue, saturation, exposure);
        d.X.vals[i] = sized.data;
        float dhue = rand_uniform_strong(-hue, hue);
        float dsat = rand_scale(saturation);
        float dexp = rand_scale(exposure);

        fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy, small_object);
        image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, jitter, dhue, dsat, dexp);
        d.X.vals[i] = ai.data;
		
        //show_image(ai, "aug");
        //cvWaitKey(0);

        free_image(orig);
        free_image(cropped);
        fill_truth_detection(filename, boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy, small_object);

        cvReleaseImage(&src);
    }
    free(random_paths);
    return d;
}
#else   // OPENCV
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure, int small_object)
{
    char **random_paths = get_random_paths(paths, n, m);
    int i;
    data d = { 0 };
    d.shallow = 0;

    d.X.rows = n;
    d.X.vals = calloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w * 3;

    d.y = make_matrix(n, 5 * boxes);
    for (i = 0; i < n; ++i) {
        image orig = load_image_color(random_paths[i], 0, 0);

        int oh = orig.h;
        int ow = orig.w;

        int dw = (ow*jitter);
        int dh = (oh*jitter);

        int pleft = rand_uniform_strong(-dw, dw);
        int pright = rand_uniform_strong(-dw, dw);
        int ptop = rand_uniform_strong(-dh, dh);
        int pbot = rand_uniform_strong(-dh, dh);

        int swidth = ow - pleft - pright;
        int sheight = oh - ptop - pbot;

        float sx = (float)swidth / ow;
        float sy = (float)sheight / oh;

        int flip = random_gen() % 2;
        image cropped = crop_image(orig, pleft, ptop, swidth, sheight);

        float dx = ((float)pleft / ow) / sx;
        float dy = ((float)ptop / oh) / sy;

        image sized = resize_image(cropped, w, h);
        if (flip) flip_image(sized);
        random_distort_image(sized, hue, saturation, exposure);
        d.X.vals[i] = sized.data;

        fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1. / sx, 1. / sy, small_object);

        free_image(orig);
        free_image(cropped);
    }
    free(random_paths);
    return d;
}
#endif  // OPENCV

void *load_thread(void *ptr)
{

 src/http_stream.cpp

@@ -47,6 +47,7 @@
using namespace cv;

#include "http_stream.h"
#include "image.h"


class MJPGWriter
@@ -185,8 +186,7 @@
        return true;
    }
};


// ----------------------------------------

void send_mjpeg(IplImage* ipl, int port, int timeout, int quality) {
    static MJPGWriter wri(port, timeout, quality);
@@ -194,7 +194,7 @@
    wri.write(mat);
    std::cout << " MJPEG-stream sent. \n";
}

// ----------------------------------------

CvCapture* get_capture_webcam(int index) {
    CvCapture* cap = NULL;
@@ -208,6 +208,7 @@
    }
    return cap;
}
// ----------------------------------------

IplImage* get_webcam_frame(CvCapture *cap) {
    IplImage* src = NULL;
@@ -225,6 +226,61 @@
    }
    return src;
}
// ----------------------------------------
extern "C" {
    image ipl_to_image(IplImage* src);  // image.c
}

image image_data_augmentation(IplImage* ipl, int w, int h,
    int pleft, int ptop, int swidth, int sheight, int flip,
    float jitter, float dhue, float dsat, float dexp)
{
    cv::Mat img = cv::cvarrToMat(ipl);

    // crop
    cv::Rect src_rect(pleft, ptop, swidth, sheight);
    cv::Rect img_rect(cv::Point2i(0, 0), img.size());
    cv::Rect new_src_rect = src_rect & img_rect;

    cv::Rect dst_rect(cv::Point2i(std::max(0, -pleft), std::max(0, -ptop)), new_src_rect.size());

    cv::Mat cropped(cv::Size(src_rect.width, src_rect.height), img.type());
    cropped.setTo(cv::Scalar::all(0));

    img(new_src_rect).copyTo(cropped(dst_rect));

    // resize
    cv::Mat sized;
    cv::resize(cropped, sized, cv::Size(w, h), 0, 0, INTER_LINEAR);

    // flip
    if (flip) {
        cv::flip(sized, cropped, 1);    // 0 - x-axis, 1 - y-axis, -1 - both axes (x & y)
        sized = cropped.clone();
    }

    // HSV augmentation
    // CV_BGR2HSV, CV_RGB2HSV, CV_HSV2BGR, CV_HSV2RGB
    cv::Mat hsv_src;
    cvtColor(sized, hsv_src, CV_BGR2HSV);   // also BGR -> RGB
	
    std::vector<cv::Mat> hsv;
    cv::split(hsv_src, hsv);

    hsv[1] *= dsat;
    hsv[2] *= dexp;
    hsv[0] += 179 * dhue;

    cv::merge(hsv, hsv_src);

    cvtColor(hsv_src, sized, CV_HSV2RGB);   // now RGB instead of BGR

    // Mat -> IplImage -> image
    IplImage src = sized;
    image out = ipl_to_image(&src);

    return out;
}


#endif  // OPENCV

 src/http_stream.h

@@ -5,11 +5,17 @@
#ifdef __cplusplus
extern "C" {
#endif
#include "image.h"

void send_mjpeg(IplImage* ipl, int port, int timeout, int quality);
CvCapture* get_capture_webcam(int index);
IplImage* get_webcam_frame(CvCapture *cap);

//image image_data_augmentation(const char *filename, int w, int h,
image image_data_augmentation(IplImage* ipl, int w, int h,
    int pleft, int ptop, int swidth, int sheight, int flip,
    float jitter, float dhue, float dsat, float dexp);

#ifdef __cplusplus
}
#endif

			@@ -668,6 +668,17 @@
			return d;
			}

			#ifdef OPENCV
			#include "opencv2/highgui/highgui_c.h"
			#include "opencv2/imgproc/imgproc_c.h"
			#include "opencv2/core/version.hpp"
			#ifndef CV_VERSION_EPOCH
			#include "opencv2/videoio/videoio_c.h"
			#include "opencv2/imgcodecs/imgcodecs_c.h"
			#endif

			#include "http_stream.h"

			data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure, int small_object)
			{
			char **random_paths = get_random_paths(paths, n, m);
			@@ -681,10 +692,22 @@

			d.y = make_matrix(n, 5*boxes);
			for(i = 0; i < n; ++i){
			image orig = load_image_color(random_paths[i], 0, 0);
			const char *filename = random_paths[i];

			int oh = orig.h;
			int ow = orig.w;
			int flag = 1;
			IplImage *src;
			if ((src = cvLoadImage(filename, flag)) == 0)
			{
			fprintf(stderr, "Cannot load image \"%s\"\n", filename);
			char buff[256];
			sprintf(buff, "echo %s >> bad.list", filename);
			system(buff);
			continue;
			//exit(0);
			}

			int oh = src->height;
			int ow = src->width;

			int dw = (ow*jitter);
			int dh = (oh*jitter);
			@@ -701,25 +724,80 @@
			float sy = (float)sheight / oh;

			int flip = random_gen()%2;
			image cropped = crop_image(orig, pleft, ptop, swidth, sheight);

			float dx = ((float)pleft/ow)/sx;
			float dy = ((float)ptop /oh)/sy;

			image sized = resize_image(cropped, w, h);
			if(flip) flip_image(sized);
			random_distort_image(sized, hue, saturation, exposure);
			d.X.vals[i] = sized.data;
			float dhue = rand_uniform_strong(-hue, hue);
			float dsat = rand_scale(saturation);
			float dexp = rand_scale(exposure);

			fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy, small_object);
			image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, jitter, dhue, dsat, dexp);
			d.X.vals[i] = ai.data;

			//show_image(ai, "aug");
			//cvWaitKey(0);

			free_image(orig);
			free_image(cropped);
			fill_truth_detection(filename, boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy, small_object);

			cvReleaseImage(&src);
			}
			free(random_paths);
			return d;
			}
			#else // OPENCV
			data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure, int small_object)
			{
			char **random_paths = get_random_paths(paths, n, m);
			int i;
			data d = { 0 };
			d.shallow = 0;

			d.X.rows = n;
			d.X.vals = calloc(d.X.rows, sizeof(float*));
			d.X.cols = hw 3;

			d.y = make_matrix(n, 5 * boxes);
			for (i = 0; i < n; ++i) {
			image orig = load_image_color(random_paths[i], 0, 0);

			int oh = orig.h;
			int ow = orig.w;

			int dw = (ow*jitter);
			int dh = (oh*jitter);

			int pleft = rand_uniform_strong(-dw, dw);
			int pright = rand_uniform_strong(-dw, dw);
			int ptop = rand_uniform_strong(-dh, dh);
			int pbot = rand_uniform_strong(-dh, dh);

			int swidth = ow - pleft - pright;
			int sheight = oh - ptop - pbot;

			float sx = (float)swidth / ow;
			float sy = (float)sheight / oh;

			int flip = random_gen() % 2;
			image cropped = crop_image(orig, pleft, ptop, swidth, sheight);

			float dx = ((float)pleft / ow) / sx;
			float dy = ((float)ptop / oh) / sy;

			image sized = resize_image(cropped, w, h);
			if (flip) flip_image(sized);
			random_distort_image(sized, hue, saturation, exposure);
			d.X.vals[i] = sized.data;

			fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1. / sx, 1. / sy, small_object);

			free_image(orig);
			free_image(cropped);
			}
			free(random_paths);
			return d;
			}
			#endif // OPENCV

			void load_thread(void ptr)
			{

			@@ -47,6 +47,7 @@
			using namespace cv;

			#include "http_stream.h"
			#include "image.h"


			class MJPGWriter
			@@ -185,8 +186,7 @@
			return true;
			}
			};


			// ----------------------------------------

			void send_mjpeg(IplImage* ipl, int port, int timeout, int quality) {
			static MJPGWriter wri(port, timeout, quality);
			@@ -194,7 +194,7 @@
			wri.write(mat);
			std::cout << " MJPEG-stream sent. \n";
			}

			// ----------------------------------------

			CvCapture* get_capture_webcam(int index) {
			CvCapture* cap = NULL;
			@@ -208,6 +208,7 @@
			}
			return cap;
			}
			// ----------------------------------------

			IplImage* get_webcam_frame(CvCapture *cap) {
			IplImage* src = NULL;
			@@ -225,6 +226,61 @@
			}
			return src;
			}
			// ----------------------------------------
			extern "C" {
			image ipl_to_image(IplImage* src); // image.c
			}

			image image_data_augmentation(IplImage* ipl, int w, int h,
			int pleft, int ptop, int swidth, int sheight, int flip,
			float jitter, float dhue, float dsat, float dexp)
			{
			cv::Mat img = cv::cvarrToMat(ipl);

			// crop
			cv::Rect src_rect(pleft, ptop, swidth, sheight);
			cv::Rect img_rect(cv::Point2i(0, 0), img.size());
			cv::Rect new_src_rect = src_rect & img_rect;

			cv::Rect dst_rect(cv::Point2i(std::max(0, -pleft), std::max(0, -ptop)), new_src_rect.size());

			cv::Mat cropped(cv::Size(src_rect.width, src_rect.height), img.type());
			cropped.setTo(cv::Scalar::all(0));

			img(new_src_rect).copyTo(cropped(dst_rect));

			// resize
			cv::Mat sized;
			cv::resize(cropped, sized, cv::Size(w, h), 0, 0, INTER_LINEAR);

			// flip
			if (flip) {
			cv::flip(sized, cropped, 1); // 0 - x-axis, 1 - y-axis, -1 - both axes (x & y)
			sized = cropped.clone();
			}

			// HSV augmentation
			// CV_BGR2HSV, CV_RGB2HSV, CV_HSV2BGR, CV_HSV2RGB
			cv::Mat hsv_src;
			cvtColor(sized, hsv_src, CV_BGR2HSV); // also BGR -> RGB

			std::vector<cv::Mat> hsv;
			cv::split(hsv_src, hsv);

			hsv[1] *= dsat;
			hsv[2] *= dexp;
			hsv[0] += 179 * dhue;

			cv::merge(hsv, hsv_src);

			cvtColor(hsv_src, sized, CV_HSV2RGB); // now RGB instead of BGR

			// Mat -> IplImage -> image
			IplImage src = sized;
			image out = ipl_to_image(&src);

			return out;
			}


			#endif // OPENCV

			@@ -5,11 +5,17 @@
			#ifdef __cplusplus
			extern "C" {
			#endif
			#include "image.h"

			void send_mjpeg(IplImage* ipl, int port, int timeout, int quality);
			CvCapture* get_capture_webcam(int index);
			IplImage* get_webcam_frame(CvCapture *cap);

			//image image_data_augmentation(const char *filename, int w, int h,
			image image_data_augmentation(IplImage* ipl, int w, int h,
			int pleft, int ptop, int swidth, int sheight, int flip,
			float jitter, float dhue, float dsat, float dexp);

			#ifdef __cplusplus
			}
			#endif