~speedprog/mtg/mtg_card_detector.git

parent: 5291935b | patch | commit | show whitespace

Merge branch 'master' of https://github.com/AlexeyAB/darknet into Fix_get_c...

IlyaOvodov

2018-06-04 f0abcfa02b2094396f955c743f7f11fcdb2e3d13

Merge branch 'master' of https://github.com/AlexeyAB/darknet into Fix_get_color_depth

14 files modified

	README.md	10 ●●●●● patch \| view \| raw \| blame \| history
	build/darknet/x64/partial.cmd	2 ●●●●● patch \| view \| raw \| blame \| history
	src/data.c	52 ●●●●● patch \| view \| raw \| blame \| history
	src/data.h	3 ●●●●● patch \| view \| raw \| blame \| history
	src/demo.c	5 ●●●●● patch \| view \| raw \| blame \| history
	src/detector.c	13 ●●●●● patch \| view \| raw \| blame \| history
	src/http_stream.cpp	9 ●●●●● patch \| view \| raw \| blame \| history
	src/image.c	29 ●●●●● patch \| view \| raw \| blame \| history
	src/network.c	2 ●●●●● patch \| view \| raw \| blame \| history
	src/network.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/parser.c	4 ●●●●● patch \| view \| raw \| blame \| history
	src/region_layer.c	3 ●●●●● patch \| view \| raw \| blame \| history
	src/utils.c	3 ●●●●● patch \| view \| raw \| blame \| history
	src/yolo_layer.c	3 ●●●●● patch \| view \| raw \| blame \| history

 README.md

@@ -415,12 +415,22 @@
  `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416`
   then set the same 9 `anchors` in each of 3 `[yolo]`-layers in your cfg-file

  * check that each object are mandatory labeled in your dataset - no one object in your data set should not be without label. In the most training issues - there are wrong labels in your dataset (got labels by using some conversion script, marked with a third-party tool, ...). Always check your dataset by using: https://github.com/AlexeyAB/Yolo_mark

  * desirable that your training dataset include images with objects at diffrent: scales, rotations, lightings, from different sides, on different backgrounds

  * desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files)

  * for training with a large number of objects in each image, add the parameter `max=200` or higher value in the last layer [region] in your cfg-file
  
  * for training for small objects - set `layers = -1, 11` instead of https://github.com/AlexeyAB/darknet/blob/6390a5a2ab61a0bdf6f1a9a6b4a739c16b36e0d7/cfg/yolov3.cfg#L720
      and set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6390a5a2ab61a0bdf6f1a9a6b4a739c16b36e0d7/cfg/yolov3.cfg#L717
  
  * General rule - you should keep relative size of objects in the Training and Testing datasets roughly the same: 

    * `train_network_width * train_obj_width / train_image_width ~= detection_network_width * detection_obj_width / detection_image_width`
    * `train_network_height * train_obj_height / train_image_height ~= detection_network_height * detection_obj_height / detection_image_height`
  
  * to speedup training (with decreasing detection accuracy) do Fine-Tuning instead of Transfer-Learning, set param `stopbackward=1` in one of the penultimate convolutional layers before the 1-st `[yolo]`-layer, for example here: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L598

2. After training - for detection:

 build/darknet/x64/partial.cmd

@@ -18,7 +18,7 @@
darknet.exe partial cfg/yolov2.cfg yolov2.weights yolov2.conv.23 23


darknet.exe partial cfg/yolov3.cfg yolov3.weights yolov3.conv.105 105
darknet.exe partial cfg/yolov3.cfg yolov3.weights yolov3.conv.81 81


darknet.exe partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15

 src/data.c

@@ -322,17 +322,43 @@
        // not detect small objects
        //if ((w < 0.001F || h < 0.001F)) continue;
        // if truth (box for object) is smaller than 1x1 pix
        if ((w < lowest_w || h < lowest_h)) continue;
        char buff[256];
        if (id >= classes) {
            printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d] \n", id, classes);
            sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, classes);
            system(buff);
            getchar();
            continue;
        }
        if ((w < lowest_w || h < lowest_h)) {
            //sprintf(buff, "echo %s \"Very small object: w < lowest_w OR h < lowest_h\" >> bad_label.list", labelpath);
            //system(buff);
            continue;
        }
        if (x == 999999 || y == 999999) {
            printf("\n Wrong annotation: x = 0, y = 0 \n");
            sprintf(buff, "echo %s \"Wrong annotation: x = 0 or y = 0\" >> bad_label.list", labelpath);
            system(buff);
            continue;
        }
        if (x < 0 || x > 1 || y < 0 || y > 1) {
            printf("\n Wrong annotation: x = %f, y = %f \n", x, y);
            sprintf(buff, "echo %s \"Wrong annotation: x = %f, y = %f\" >> bad_label.list", labelpath, x, y);
            system(buff);
            continue;
        }
        if (w > 1) printf("\n Wrong annotation: w = %f \n", w), w = 1;
        if (h > 1) printf("\n Wrong annotation: h = %f \n", h), h = 1;
        if (w > 1) {
            printf("\n Wrong annotation: w = %f \n", w);
            sprintf(buff, "echo %s \"Wrong annotation: w = %f\" >> bad_label.list", labelpath, w);
            system(buff);
            w = 1;
        }
        if (h > 1) {
            printf("\n Wrong annotation: h = %f \n", h);
            sprintf(buff, "echo %s \"Wrong annotation: h = %f\" >> bad_label.list", labelpath, h);
            system(buff);
            h = 1;
        }
        if (x == 0) x += lowest_w;
        if (y == 0) y += lowest_h;

@@ -687,8 +713,9 @@

#include "http_stream.h"

data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
{
    c = c ? c : 3;
    char **random_paths = get_random_paths(paths, n, m);
    int i;
    data d = {0};
@@ -696,13 +723,13 @@

    d.X.rows = n;
    d.X.vals = calloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w*3;
    d.X.cols = h*w*c;

    d.y = make_matrix(n, 5*boxes);
    for(i = 0; i < n; ++i){
        const char *filename = random_paths[i];

        int flag = 1;
        int flag = (c >= 3);
        IplImage *src;
        if ((src = cvLoadImage(filename, flag)) == 0)
        {
@@ -754,8 +781,9 @@
    return d;
}
#else   // OPENCV
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
{
    c = c ? c : 3;
    char **random_paths = get_random_paths(paths, n, m);
    int i;
    data d = { 0 };
@@ -763,11 +791,11 @@

    d.X.rows = n;
    d.X.vals = calloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w * 3;
    d.X.cols = h*w*c;

    d.y = make_matrix(n, 5 * boxes);
    for (i = 0; i < n; ++i) {
        image orig = load_image_color(random_paths[i], 0, 0);
        image orig = load_image(random_paths[i], 0, 0, c);

        int oh = orig.h;
        int ow = orig.w;
@@ -827,16 +855,16 @@
    } else if (a.type == REGION_DATA){
        *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
    } else if (a.type == DETECTION_DATA){
        *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.flip, a.jitter, a.hue, a.saturation, a.exposure, a.small_object);
        *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.classes, a.flip, a.jitter, a.hue, a.saturation, a.exposure, a.small_object);
    } else if (a.type == SWAG_DATA){
        *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
    } else if (a.type == COMPARE_DATA){
        *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h);
    } else if (a.type == IMAGE_DATA){
        *(a.im) = load_image_color(a.path, 0, 0);
        *(a.im) = load_image(a.path, 0, 0, a.c);
        *(a.resized) = resize_image(*(a.im), a.w, a.h);
    }else if (a.type == LETTERBOX_DATA) {
        *(a.im) = load_image_color(a.path, 0, 0);
        *(a.im) = load_image(a.path, 0, 0, a.c);
        *(a.resized) = letterbox_image(*(a.im), a.w, a.h);
    } else if (a.type == TAG_DATA){
        *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.flip, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);

 src/data.h

@@ -44,6 +44,7 @@
    char **labels;
    int h;
    int w;
    int c; // color depth
    int out_w;
    int out_h;
    int nh;
@@ -84,7 +85,7 @@
data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object);
data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object);
data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
data load_data_super(char **paths, int n, int m, int w, int h, int scale);

 src/demo.c

@@ -51,7 +51,7 @@
void draw_detections_cv(IplImage* show_img, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes);
void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output);
void show_image_cv_ipl(IplImage *disp, const char *name);
image get_image_from_stream_resize(CvCapture *cap, int w, int h, IplImage** in_img, int cpp_video_capture);
image get_image_from_stream_resize(CvCapture *cap, int w, int h, int c, IplImage** in_img, int cpp_video_capture, int dont_close);
IplImage* in_img;
IplImage* det_img;
IplImage* show_img;
@@ -61,7 +61,8 @@
void *fetch_in_thread(void *ptr)
{
    //in = get_image_from_stream(cap);
    in_s = get_image_from_stream_resize(cap, net.w, net.h, &in_img, cpp_video_capture);
    int dont_close_stream = 0;  // set 1 if your IP-camera periodically turns off and turns on video-stream
    in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, cpp_video_capture, dont_close_stream);
    if(!in_s.data){
        //error("Stream closed.");
        printf("Stream closed.\n");

 src/detector.c

@@ -87,6 +87,7 @@
    load_args args = {0};
    args.w = net.w;
    args.h = net.h;
    args.c = net.c;
    args.paths = paths;
    args.n = imgs;
    args.m = plist->size;
@@ -105,7 +106,7 @@
    args.hue = net.hue;

#ifdef OPENCV
    args.threads = 3;
    args.threads = 3 * ngpus;
    IplImage* img = NULL;
    float max_img_loss = 5;
    int number_of_lines = 100;
@@ -388,6 +389,7 @@
    load_args args = { 0 };
    args.w = net.w;
    args.h = net.h;
    args.c = net.c;
    args.type = IMAGE_DATA;
    //args.type = LETTERBOX_DATA;

@@ -482,7 +484,7 @@

    for (i = 0; i < m; ++i) {
        char *path = paths[i];
        image orig = load_image_color(path, 0, 0);
        image orig = load_image(path, 0, 0, net.c);
        image sized = resize_image(orig, net.w, net.h);
        char *id = basecfg(path);
        network_predict(net, sized.data);
@@ -595,6 +597,7 @@
    load_args args = { 0 };
    args.w = net.w;
    args.h = net.h;
    args.c = net.c;
    args.type = IMAGE_DATA;
    //args.type = LETTERBOX_DATA;

@@ -1093,10 +1096,10 @@
            if(!input) return;
            strtok(input, "\n");
        }
        image im = load_image_color(input,0,0);
        image im = load_image(input,0,0,net.c);
        int letterbox = 0;
        //image sized = resize_image(im, net.w, net.h);
        image sized = letterbox_image(im, net.w, net.h); letterbox = 1;
        image sized = resize_image(im, net.w, net.h);
        //image sized = letterbox_image(im, net.w, net.h); letterbox = 1;
        layer l = net.layers[net.n-1];

        //box *boxes = calloc(l.w*l.h*l.n, sizeof(box));

 src/http_stream.cpp

@@ -44,7 +44,7 @@
using std::endl;

#include "opencv2/opencv.hpp"
#include "opencv2/highgui.hpp"
#include "opencv2/highgui/highgui.hpp"
#include "opencv2/highgui/highgui_c.h"
#include "opencv2/imgproc/imgproc_c.h"
#ifndef CV_VERSION_EPOCH
@@ -283,6 +283,8 @@

    // HSV augmentation
    // CV_BGR2HSV, CV_RGB2HSV, CV_HSV2BGR, CV_HSV2RGB
    if (ipl->nChannels >= 3)
    {
    cv::Mat hsv_src;
    cvtColor(sized, hsv_src, CV_BGR2HSV);   // also BGR -> RGB
    
@@ -296,6 +298,11 @@
    cv::merge(hsv, hsv_src);

    cvtColor(hsv_src, sized, CV_HSV2RGB);   // now RGB instead of BGR
    }
    else
    {
        sized *= dexp;
    }

    // Mat -> IplImage -> image
    IplImage src = sized;

 src/image.c

@@ -957,7 +957,7 @@
{
    IplImage* src = 0;
    int flag = -1;
    if (channels == 0) flag = -1;
    if (channels == 0) flag = 1;
    else if (channels == 1) flag = 0;
    else if (channels == 3) flag = 1;
    else {
@@ -975,6 +975,7 @@
    }
    image out = ipl_to_image(src);
    cvReleaseImage(&src);
    if (out.c > 1)
    rgbgr_image(out);
    return out;
}
@@ -1010,8 +1011,9 @@
    return im;
}

image get_image_from_stream_resize(CvCapture *cap, int w, int h, IplImage** in_img, int cpp_video_capture)
image get_image_from_stream_resize(CvCapture *cap, int w, int h, int c, IplImage** in_img, int cpp_video_capture, int dont_close)
{
    c = c ? c : 3;
    IplImage* src;
    if (cpp_video_capture) {
        static int once = 1;
@@ -1027,15 +1029,23 @@
    }
    else src = cvQueryFrame(cap);

    if (!src) return make_empty_image(0, 0, 0);
    if (src->width < 1 || src->height < 1 || src->nChannels < 1) return make_empty_image(0, 0, 0);
    IplImage* new_img = cvCreateImage(cvSize(w, h), IPL_DEPTH_8U, 3);
    *in_img = cvCreateImage(cvSize(src->width, src->height), IPL_DEPTH_8U, 3);
    if (!src) { 
        if (dont_close) src = cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, c);
        else return make_empty_image(0, 0, 0); 
    }
    if (src->width < 1 || src->height < 1 || src->nChannels < 1) {
        if (cpp_video_capture) cvReleaseImage(&src);
        if (dont_close) src = cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, c);
        else return make_empty_image(0, 0, 0);
    }
    IplImage* new_img = cvCreateImage(cvSize(w, h), IPL_DEPTH_8U, c);
    *in_img = cvCreateImage(cvSize(src->width, src->height), IPL_DEPTH_8U, c);
    cvResize(src, *in_img, CV_INTER_LINEAR);
    cvResize(src, new_img, CV_INTER_LINEAR);
    image im = ipl_to_image(new_img);
    cvReleaseImage(&new_img);
    if (cpp_video_capture) cvReleaseImage(&src);
    if (c>1)
    rgbgr_image(im);
    return im;
}
@@ -1589,6 +1599,8 @@

void distort_image(image im, float hue, float sat, float val)
{
    if (im.c >= 3)
    {
    rgb_to_hsv(im);
    scale_image_channel(im, 1, sat);
    scale_image_channel(im, 2, val);
@@ -1599,6 +1611,11 @@
        if (im.data[i] < 0) im.data[i] += 1;
    }
    hsv_to_rgb(im);
    }
    else
    {
        scale_image_channel(im, 0, val);
    }
    constrain_image(im);
}


 src/network.c

@@ -582,7 +582,7 @@
    box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
    float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
    int i, j;
    for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
    for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float));
    get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map);
    for (j = 0; j < l.w*l.h*l.n; ++j) {
        dets[j].classes = l.classes;

 src/network.h

@@ -147,7 +147,7 @@

int get_network_nuisance(network net);
int get_network_background(network net);
void fuse_conv_batchnorm(network net);
YOLODLL_API void fuse_conv_batchnorm(network net);

#ifdef __cplusplus
}

 src/parser.c

@@ -268,7 +268,7 @@

    char *a = option_find_str(options, "mask", 0);
    int *mask = parse_yolo_mask(a, &num);
    int max_boxes = option_find_int_quiet(options, "max", 30);
    int max_boxes = option_find_int_quiet(options, "max", 90);
    layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes);
    if (l.outputs != params.inputs) {
        printf("Error: l.outputs == params.inputs \n");
@@ -310,7 +310,7 @@
    int coords = option_find_int(options, "coords", 4);
    int classes = option_find_int(options, "classes", 20);
    int num = option_find_int(options, "num", 1);
    int max_boxes = option_find_int_quiet(options, "max", 30);
    int max_boxes = option_find_int_quiet(options, "max", 90);

    layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords, max_boxes);
    if (l.outputs != params.inputs) {

 src/region_layer.c

@@ -297,7 +297,8 @@
            box truth = float_to_box(state.truth + t*5 + b*l.truths);
            int class_id = state.truth[t * 5 + b*l.truths + 4];
            if (class_id >= l.classes) {
                printf("Warning: in txt-labels class_id=%d >= classes=%d in cfg-file\n", class_id, l.classes);
                printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes-1);
                getchar();
                continue; // if label contains class_id more than number of classes in the cfg-file
            }


 src/utils.c

@@ -212,6 +212,9 @@
    //find_replace(output_path, "JPEGImages", "labels", output_path);   // PascalVOC
    find_replace(output_path, "VOC2007/JPEGImages", "VOC2007/labels", output_path);     // PascalVOC
    find_replace(output_path, "VOC2012/JPEGImages", "VOC2012/labels", output_path);     // PascalVOC

    //find_replace(output_path, "/raw/", "/labels/", output_path);

    // replace only ext of files
    find_replace_extension(output_path, ".jpg", ".txt", output_path);
    find_replace_extension(output_path, ".JPG", ".txt", output_path); // error

 src/yolo_layer.c

@@ -202,7 +202,8 @@
                        box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1);
                        int class_id = state.truth[t*(4 + 1) + b*l.truths + 4];
                        if (class_id >= l.classes) {
                            printf("Warning: in txt-labels class_id=%d >= classes=%d in cfg-file\n", class_id, l.classes);
                            printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1);
                            getchar();
                            continue; // if label contains class_id more than number of classes in the cfg-file
                        }
                        if(!truth.x) break;

			@@ -415,12 +415,22 @@
			`darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416`
			then set the same 9 `anchors` in each of 3 `[yolo]`-layers in your cfg-file

			* check that each object are mandatory labeled in your dataset - no one object in your data set should not be without label. In the most training issues - there are wrong labels in your dataset (got labels by using some conversion script, marked with a third-party tool, ...). Always check your dataset by using: https://github.com/AlexeyAB/Yolo_mark

			* desirable that your training dataset include images with objects at diffrent: scales, rotations, lightings, from different sides, on different backgrounds

			* desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files)

			* for training with a large number of objects in each image, add the parameter `max=200` or higher value in the last layer [region] in your cfg-file

			* for training for small objects - set `layers = -1, 11` instead of https://github.com/AlexeyAB/darknet/blob/6390a5a2ab61a0bdf6f1a9a6b4a739c16b36e0d7/cfg/yolov3.cfg#L720
			and set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6390a5a2ab61a0bdf6f1a9a6b4a739c16b36e0d7/cfg/yolov3.cfg#L717

			* General rule - you should keep relative size of objects in the Training and Testing datasets roughly the same:

			* `train_network_width * train_obj_width / train_image_width ~= detection_network_width * detection_obj_width / detection_image_width`
			* `train_network_height * train_obj_height / train_image_height ~= detection_network_height * detection_obj_height / detection_image_height`

			* to speedup training (with decreasing detection accuracy) do Fine-Tuning instead of Transfer-Learning, set param `stopbackward=1` in one of the penultimate convolutional layers before the 1-st `[yolo]`-layer, for example here: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L598

			2. After training - for detection:

			@@ -18,7 +18,7 @@
			darknet.exe partial cfg/yolov2.cfg yolov2.weights yolov2.conv.23 23


			darknet.exe partial cfg/yolov3.cfg yolov3.weights yolov3.conv.105 105
			darknet.exe partial cfg/yolov3.cfg yolov3.weights yolov3.conv.81 81


			darknet.exe partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15

			@@ -322,17 +322,43 @@
			// not detect small objects
			//if ((w < 0.001F \|\| h < 0.001F)) continue;
			// if truth (box for object) is smaller than 1x1 pix
			if ((w < lowest_w \|\| h < lowest_h)) continue;
			char buff[256];
			if (id >= classes) {
			printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d] \n", id, classes);
			sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, classes);
			system(buff);
			getchar();
			continue;
			}
			if ((w < lowest_w \|\| h < lowest_h)) {
			//sprintf(buff, "echo %s \"Very small object: w < lowest_w OR h < lowest_h\" >> bad_label.list", labelpath);
			//system(buff);
			continue;
			}
			if (x == 999999 \|\| y == 999999) {
			printf("\n Wrong annotation: x = 0, y = 0 \n");
			sprintf(buff, "echo %s \"Wrong annotation: x = 0 or y = 0\" >> bad_label.list", labelpath);
			system(buff);
			continue;
			}
			if (x < 0 \|\| x > 1 \|\| y < 0 \|\| y > 1) {
			printf("\n Wrong annotation: x = %f, y = %f \n", x, y);
			sprintf(buff, "echo %s \"Wrong annotation: x = %f, y = %f\" >> bad_label.list", labelpath, x, y);
			system(buff);
			continue;
			}
			if (w > 1) printf("\n Wrong annotation: w = %f \n", w), w = 1;
			if (h > 1) printf("\n Wrong annotation: h = %f \n", h), h = 1;
			if (w > 1) {
			printf("\n Wrong annotation: w = %f \n", w);
			sprintf(buff, "echo %s \"Wrong annotation: w = %f\" >> bad_label.list", labelpath, w);
			system(buff);
			w = 1;
			}
			if (h > 1) {
			printf("\n Wrong annotation: h = %f \n", h);
			sprintf(buff, "echo %s \"Wrong annotation: h = %f\" >> bad_label.list", labelpath, h);
			system(buff);
			h = 1;
			}
			if (x == 0) x += lowest_w;
			if (y == 0) y += lowest_h;

			@@ -687,8 +713,9 @@

			#include "http_stream.h"

			data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
			data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
			{
			c = c ? c : 3;
			char **random_paths = get_random_paths(paths, n, m);
			int i;
			data d = {0};
			@@ -696,13 +723,13 @@

			d.X.rows = n;
			d.X.vals = calloc(d.X.rows, sizeof(float*));
			d.X.cols = hw3;
			d.X.cols = hwc;

			d.y = make_matrix(n, 5*boxes);
			for(i = 0; i < n; ++i){
			const char *filename = random_paths[i];

			int flag = 1;
			int flag = (c >= 3);
			IplImage *src;
			if ((src = cvLoadImage(filename, flag)) == 0)
			{
			@@ -754,8 +781,9 @@
			return d;
			}
			#else // OPENCV
			data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
			data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
			{
			c = c ? c : 3;
			char **random_paths = get_random_paths(paths, n, m);
			int i;
			data d = { 0 };
			@@ -763,11 +791,11 @@

			d.X.rows = n;
			d.X.vals = calloc(d.X.rows, sizeof(float*));
			d.X.cols = hw 3;
			d.X.cols = hwc;

			d.y = make_matrix(n, 5 * boxes);
			for (i = 0; i < n; ++i) {
			image orig = load_image_color(random_paths[i], 0, 0);
			image orig = load_image(random_paths[i], 0, 0, c);

			int oh = orig.h;
			int ow = orig.w;
			@@ -827,16 +855,16 @@
			} else if (a.type == REGION_DATA){
			*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
			} else if (a.type == DETECTION_DATA){
			*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.flip, a.jitter, a.hue, a.saturation, a.exposure, a.small_object);
			*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.classes, a.flip, a.jitter, a.hue, a.saturation, a.exposure, a.small_object);
			} else if (a.type == SWAG_DATA){
			*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
			} else if (a.type == COMPARE_DATA){
			*a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h);
			} else if (a.type == IMAGE_DATA){
			*(a.im) = load_image_color(a.path, 0, 0);
			*(a.im) = load_image(a.path, 0, 0, a.c);
			(a.resized) = resize_image((a.im), a.w, a.h);
			}else if (a.type == LETTERBOX_DATA) {
			*(a.im) = load_image_color(a.path, 0, 0);
			*(a.im) = load_image(a.path, 0, 0, a.c);
			(a.resized) = letterbox_image((a.im), a.w, a.h);
			} else if (a.type == TAG_DATA){
			*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.flip, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);

			@@ -44,6 +44,7 @@
			char **labels;
			int h;
			int w;
			int c; // color depth
			int out_w;
			int out_h;
			int nh;
			@@ -84,7 +85,7 @@
			data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
			data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
			data load_data_old(char paths, int n, int m, char labels, int k, int w, int h);
			data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object);
			data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object);
			data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
			matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
			data load_data_super(char **paths, int n, int m, int w, int h, int scale);

			@@ -51,7 +51,7 @@
			void draw_detections_cv(IplImage* show_img, int num, float thresh, box boxes, float probs, char names, image *alphabet, int classes);
			void draw_detections_cv_v3(IplImage* show_img, detection dets, int num, float thresh, char names, image *alphabet, int classes, int ext_output);
			void show_image_cv_ipl(IplImage disp, const char name);
			image get_image_from_stream_resize(CvCapture cap, int w, int h, IplImage* in_img, int cpp_video_capture);
			image get_image_from_stream_resize(CvCapture cap, int w, int h, int c, IplImage* in_img, int cpp_video_capture, int dont_close);
			IplImage* in_img;
			IplImage* det_img;
			IplImage* show_img;
			@@ -61,7 +61,8 @@
			void fetch_in_thread(void ptr)
			{
			//in = get_image_from_stream(cap);
			in_s = get_image_from_stream_resize(cap, net.w, net.h, &in_img, cpp_video_capture);
			int dont_close_stream = 0; // set 1 if your IP-camera periodically turns off and turns on video-stream
			in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, cpp_video_capture, dont_close_stream);
			if(!in_s.data){
			//error("Stream closed.");
			printf("Stream closed.\n");

			@@ -87,6 +87,7 @@
			load_args args = {0};
			args.w = net.w;
			args.h = net.h;
			args.c = net.c;
			args.paths = paths;
			args.n = imgs;
			args.m = plist->size;
			@@ -105,7 +106,7 @@
			args.hue = net.hue;

			#ifdef OPENCV
			args.threads = 3;
			args.threads = 3 * ngpus;
			IplImage* img = NULL;
			float max_img_loss = 5;
			int number_of_lines = 100;
			@@ -388,6 +389,7 @@
			load_args args = { 0 };
			args.w = net.w;
			args.h = net.h;
			args.c = net.c;
			args.type = IMAGE_DATA;
			//args.type = LETTERBOX_DATA;

			@@ -482,7 +484,7 @@

			for (i = 0; i < m; ++i) {
			char *path = paths[i];
			image orig = load_image_color(path, 0, 0);
			image orig = load_image(path, 0, 0, net.c);
			image sized = resize_image(orig, net.w, net.h);
			char *id = basecfg(path);
			network_predict(net, sized.data);
			@@ -595,6 +597,7 @@
			load_args args = { 0 };
			args.w = net.w;
			args.h = net.h;
			args.c = net.c;
			args.type = IMAGE_DATA;
			//args.type = LETTERBOX_DATA;

			@@ -1093,10 +1096,10 @@
			if(!input) return;
			strtok(input, "\n");
			}
			image im = load_image_color(input,0,0);
			image im = load_image(input,0,0,net.c);
			int letterbox = 0;
			//image sized = resize_image(im, net.w, net.h);
			image sized = letterbox_image(im, net.w, net.h); letterbox = 1;
			image sized = resize_image(im, net.w, net.h);
			//image sized = letterbox_image(im, net.w, net.h); letterbox = 1;
			layer l = net.layers[net.n-1];

			//box boxes = calloc(l.wl.h*l.n, sizeof(box));

			@@ -44,7 +44,7 @@
			using std::endl;

			#include "opencv2/opencv.hpp"
			#include "opencv2/highgui.hpp"
			#include "opencv2/highgui/highgui.hpp"
			#include "opencv2/highgui/highgui_c.h"
			#include "opencv2/imgproc/imgproc_c.h"
			#ifndef CV_VERSION_EPOCH
			@@ -283,6 +283,8 @@

			// HSV augmentation
			// CV_BGR2HSV, CV_RGB2HSV, CV_HSV2BGR, CV_HSV2RGB
			if (ipl->nChannels >= 3)
			{
			cv::Mat hsv_src;
			cvtColor(sized, hsv_src, CV_BGR2HSV); // also BGR -> RGB

			@@ -296,6 +298,11 @@
			cv::merge(hsv, hsv_src);

			cvtColor(hsv_src, sized, CV_HSV2RGB); // now RGB instead of BGR
			}
			else
			{
			sized *= dexp;
			}

			// Mat -> IplImage -> image
			IplImage src = sized;

			@@ -957,7 +957,7 @@
			{
			IplImage* src = 0;
			int flag = -1;
			if (channels == 0) flag = -1;
			if (channels == 0) flag = 1;
			else if (channels == 1) flag = 0;
			else if (channels == 3) flag = 1;
			else {
			@@ -975,6 +975,7 @@
			}
			image out = ipl_to_image(src);
			cvReleaseImage(&src);
			if (out.c > 1)
			rgbgr_image(out);
			return out;
			}
			@@ -1010,8 +1011,9 @@
			return im;
			}

			image get_image_from_stream_resize(CvCapture cap, int w, int h, IplImage* in_img, int cpp_video_capture)
			image get_image_from_stream_resize(CvCapture cap, int w, int h, int c, IplImage* in_img, int cpp_video_capture, int dont_close)
			{
			c = c ? c : 3;
			IplImage* src;
			if (cpp_video_capture) {
			static int once = 1;
			@@ -1027,15 +1029,23 @@
			}
			else src = cvQueryFrame(cap);

			if (!src) return make_empty_image(0, 0, 0);
			if (src->width < 1 \|\| src->height < 1 \|\| src->nChannels < 1) return make_empty_image(0, 0, 0);
			IplImage* new_img = cvCreateImage(cvSize(w, h), IPL_DEPTH_8U, 3);
			*in_img = cvCreateImage(cvSize(src->width, src->height), IPL_DEPTH_8U, 3);
			if (!src) {
			if (dont_close) src = cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, c);
			else return make_empty_image(0, 0, 0);
			}
			if (src->width < 1 \|\| src->height < 1 \|\| src->nChannels < 1) {
			if (cpp_video_capture) cvReleaseImage(&src);
			if (dont_close) src = cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, c);
			else return make_empty_image(0, 0, 0);
			}
			IplImage* new_img = cvCreateImage(cvSize(w, h), IPL_DEPTH_8U, c);
			*in_img = cvCreateImage(cvSize(src->width, src->height), IPL_DEPTH_8U, c);
			cvResize(src, *in_img, CV_INTER_LINEAR);
			cvResize(src, new_img, CV_INTER_LINEAR);
			image im = ipl_to_image(new_img);
			cvReleaseImage(&new_img);
			if (cpp_video_capture) cvReleaseImage(&src);
			if (c>1)
			rgbgr_image(im);
			return im;
			}
			@@ -1589,6 +1599,8 @@

			void distort_image(image im, float hue, float sat, float val)
			{
			if (im.c >= 3)
			{
			rgb_to_hsv(im);
			scale_image_channel(im, 1, sat);
			scale_image_channel(im, 2, val);
			@@ -1599,6 +1611,11 @@
			if (im.data[i] < 0) im.data[i] += 1;
			}
			hsv_to_rgb(im);
			}
			else
			{
			scale_image_channel(im, 0, val);
			}
			constrain_image(im);
			}

			@@ -582,7 +582,7 @@
			box boxes = calloc(l.wl.h*l.n, sizeof(box));
			float *probs = calloc(l.wl.hl.n, sizeof(float ));
			int i, j;
			for (j = 0; j < l.wl.hl.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
			for (j = 0; j < l.wl.hl.n; ++j) probs[j] = calloc(l.classes, sizeof(float));
			get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map);
			for (j = 0; j < l.wl.hl.n; ++j) {
			dets[j].classes = l.classes;

			@@ -147,7 +147,7 @@

			int get_network_nuisance(network net);
			int get_network_background(network net);
			void fuse_conv_batchnorm(network net);
			YOLODLL_API void fuse_conv_batchnorm(network net);

			#ifdef __cplusplus
			}

			@@ -268,7 +268,7 @@

			char *a = option_find_str(options, "mask", 0);
			int *mask = parse_yolo_mask(a, &num);
			int max_boxes = option_find_int_quiet(options, "max", 30);
			int max_boxes = option_find_int_quiet(options, "max", 90);
			layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes);
			if (l.outputs != params.inputs) {
			printf("Error: l.outputs == params.inputs \n");
			@@ -310,7 +310,7 @@
			int coords = option_find_int(options, "coords", 4);
			int classes = option_find_int(options, "classes", 20);
			int num = option_find_int(options, "num", 1);
			int max_boxes = option_find_int_quiet(options, "max", 30);
			int max_boxes = option_find_int_quiet(options, "max", 90);

			layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords, max_boxes);
			if (l.outputs != params.inputs) {

			@@ -297,7 +297,8 @@
			box truth = float_to_box(state.truth + t5 + bl.truths);
			int class_id = state.truth[t * 5 + b*l.truths + 4];
			if (class_id >= l.classes) {
			printf("Warning: in txt-labels class_id=%d >= classes=%d in cfg-file\n", class_id, l.classes);
			printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes-1);
			getchar();
			continue; // if label contains class_id more than number of classes in the cfg-file
			}

			@@ -212,6 +212,9 @@
			//find_replace(output_path, "JPEGImages", "labels", output_path); // PascalVOC
			find_replace(output_path, "VOC2007/JPEGImages", "VOC2007/labels", output_path); // PascalVOC
			find_replace(output_path, "VOC2012/JPEGImages", "VOC2012/labels", output_path); // PascalVOC

			//find_replace(output_path, "/raw/", "/labels/", output_path);

			// replace only ext of files
			find_replace_extension(output_path, ".jpg", ".txt", output_path);
			find_replace_extension(output_path, ".JPG", ".txt", output_path); // error

			@@ -202,7 +202,8 @@
			box truth = float_to_box_stride(state.truth + t(4 + 1) + bl.truths, 1);
			int class_id = state.truth[t(4 + 1) + bl.truths + 4];
			if (class_id >= l.classes) {
			printf("Warning: in txt-labels class_id=%d >= classes=%d in cfg-file\n", class_id, l.classes);
			printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1);
			getchar();
			continue; // if label contains class_id more than number of classes in the cfg-file
			}
			if(!truth.x) break;