From f0abcfa02b2094396f955c743f7f11fcdb2e3d13 Mon Sep 17 00:00:00 2001
From: IlyaOvodov <b@ovdv.ru>
Date: Mon, 04 Jun 2018 15:57:15 +0000
Subject: [PATCH] Merge branch 'master' of https://github.com/AlexeyAB/darknet into Fix_get_color_depth
---
src/network.c | 2
src/network.h | 2
build/darknet/x64/partial.cmd | 2
src/data.c | 52 ++++++++++---
src/http_stream.cpp | 29 ++++--
src/region_layer.c | 3
src/yolo_layer.c | 3
src/data.h | 5
README.md | 10 ++
src/image.c | 53 ++++++++----
src/demo.c | 5
src/parser.c | 4
src/detector.c | 15 ++-
src/utils.c | 3
14 files changed, 130 insertions(+), 58 deletions(-)
diff --git a/README.md b/README.md
index e56efa8..7cf0b77 100644
--- a/README.md
+++ b/README.md
@@ -415,12 +415,22 @@
`darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416`
then set the same 9 `anchors` in each of 3 `[yolo]`-layers in your cfg-file
+ * check that each object are mandatory labeled in your dataset - no one object in your data set should not be without label. In the most training issues - there are wrong labels in your dataset (got labels by using some conversion script, marked with a third-party tool, ...). Always check your dataset by using: https://github.com/AlexeyAB/Yolo_mark
+
* desirable that your training dataset include images with objects at diffrent: scales, rotations, lightings, from different sides, on different backgrounds
* desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files)
* for training with a large number of objects in each image, add the parameter `max=200` or higher value in the last layer [region] in your cfg-file
+ * for training for small objects - set `layers = -1, 11` instead of https://github.com/AlexeyAB/darknet/blob/6390a5a2ab61a0bdf6f1a9a6b4a739c16b36e0d7/cfg/yolov3.cfg#L720
+ and set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6390a5a2ab61a0bdf6f1a9a6b4a739c16b36e0d7/cfg/yolov3.cfg#L717
+
+ * General rule - you should keep relative size of objects in the Training and Testing datasets roughly the same:
+
+ * `train_network_width * train_obj_width / train_image_width ~= detection_network_width * detection_obj_width / detection_image_width`
+ * `train_network_height * train_obj_height / train_image_height ~= detection_network_height * detection_obj_height / detection_image_height`
+
* to speedup training (with decreasing detection accuracy) do Fine-Tuning instead of Transfer-Learning, set param `stopbackward=1` in one of the penultimate convolutional layers before the 1-st `[yolo]`-layer, for example here: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L598
2. After training - for detection:
diff --git a/build/darknet/x64/partial.cmd b/build/darknet/x64/partial.cmd
index a6ac262..acbe3fa 100644
--- a/build/darknet/x64/partial.cmd
+++ b/build/darknet/x64/partial.cmd
@@ -18,7 +18,7 @@
darknet.exe partial cfg/yolov2.cfg yolov2.weights yolov2.conv.23 23
-darknet.exe partial cfg/yolov3.cfg yolov3.weights yolov3.conv.105 105
+darknet.exe partial cfg/yolov3.cfg yolov3.weights yolov3.conv.81 81
darknet.exe partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15
diff --git a/src/data.c b/src/data.c
index a15bc1d..c98ede9 100644
--- a/src/data.c
+++ b/src/data.c
@@ -322,17 +322,43 @@
// not detect small objects
//if ((w < 0.001F || h < 0.001F)) continue;
// if truth (box for object) is smaller than 1x1 pix
- if ((w < lowest_w || h < lowest_h)) continue;
+ char buff[256];
+ if (id >= classes) {
+ printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d] \n", id, classes);
+ sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, classes);
+ system(buff);
+ getchar();
+ continue;
+ }
+ if ((w < lowest_w || h < lowest_h)) {
+ //sprintf(buff, "echo %s \"Very small object: w < lowest_w OR h < lowest_h\" >> bad_label.list", labelpath);
+ //system(buff);
+ continue;
+ }
if (x == 999999 || y == 999999) {
printf("\n Wrong annotation: x = 0, y = 0 \n");
+ sprintf(buff, "echo %s \"Wrong annotation: x = 0 or y = 0\" >> bad_label.list", labelpath);
+ system(buff);
continue;
}
if (x < 0 || x > 1 || y < 0 || y > 1) {
printf("\n Wrong annotation: x = %f, y = %f \n", x, y);
+ sprintf(buff, "echo %s \"Wrong annotation: x = %f, y = %f\" >> bad_label.list", labelpath, x, y);
+ system(buff);
continue;
}
- if (w > 1) printf("\n Wrong annotation: w = %f \n", w), w = 1;
- if (h > 1) printf("\n Wrong annotation: h = %f \n", h), h = 1;
+ if (w > 1) {
+ printf("\n Wrong annotation: w = %f \n", w);
+ sprintf(buff, "echo %s \"Wrong annotation: w = %f\" >> bad_label.list", labelpath, w);
+ system(buff);
+ w = 1;
+ }
+ if (h > 1) {
+ printf("\n Wrong annotation: h = %f \n", h);
+ sprintf(buff, "echo %s \"Wrong annotation: h = %f\" >> bad_label.list", labelpath, h);
+ system(buff);
+ h = 1;
+ }
if (x == 0) x += lowest_w;
if (y == 0) y += lowest_h;
@@ -687,8 +713,9 @@
#include "http_stream.h"
-data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
+data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
{
+ c = c ? c : 3;
char **random_paths = get_random_paths(paths, n, m);
int i;
data d = {0};
@@ -696,13 +723,13 @@
d.X.rows = n;
d.X.vals = calloc(d.X.rows, sizeof(float*));
- d.X.cols = h*w*3;
+ d.X.cols = h*w*c;
d.y = make_matrix(n, 5*boxes);
for(i = 0; i < n; ++i){
const char *filename = random_paths[i];
- int flag = 1;
+ int flag = (c >= 3);
IplImage *src;
if ((src = cvLoadImage(filename, flag)) == 0)
{
@@ -754,8 +781,9 @@
return d;
}
#else // OPENCV
-data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
+data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
{
+ c = c ? c : 3;
char **random_paths = get_random_paths(paths, n, m);
int i;
data d = { 0 };
@@ -763,11 +791,11 @@
d.X.rows = n;
d.X.vals = calloc(d.X.rows, sizeof(float*));
- d.X.cols = h*w * 3;
+ d.X.cols = h*w*c;
d.y = make_matrix(n, 5 * boxes);
for (i = 0; i < n; ++i) {
- image orig = load_image_color(random_paths[i], 0, 0);
+ image orig = load_image(random_paths[i], 0, 0, c);
int oh = orig.h;
int ow = orig.w;
@@ -827,16 +855,16 @@
} else if (a.type == REGION_DATA){
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
} else if (a.type == DETECTION_DATA){
- *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.flip, a.jitter, a.hue, a.saturation, a.exposure, a.small_object);
+ *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.classes, a.flip, a.jitter, a.hue, a.saturation, a.exposure, a.small_object);
} else if (a.type == SWAG_DATA){
*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
} else if (a.type == COMPARE_DATA){
*a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h);
} else if (a.type == IMAGE_DATA){
- *(a.im) = load_image_color(a.path, 0, 0);
+ *(a.im) = load_image(a.path, 0, 0, a.c);
*(a.resized) = resize_image(*(a.im), a.w, a.h);
}else if (a.type == LETTERBOX_DATA) {
- *(a.im) = load_image_color(a.path, 0, 0);
+ *(a.im) = load_image(a.path, 0, 0, a.c);
*(a.resized) = letterbox_image(*(a.im), a.w, a.h);
} else if (a.type == TAG_DATA){
*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.flip, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
diff --git a/src/data.h b/src/data.h
index 57f4702..b46143f 100644
--- a/src/data.h
+++ b/src/data.h
@@ -44,7 +44,8 @@
char **labels;
int h;
int w;
- int out_w;
+ int c; // color depth
+ int out_w;
int out_h;
int nh;
int nw;
@@ -84,7 +85,7 @@
data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h);
-data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object);
+data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object);
data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure);
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
diff --git a/src/demo.c b/src/demo.c
index 81eddb2..0a6a4cb 100644
--- a/src/demo.c
+++ b/src/demo.c
@@ -51,7 +51,7 @@
void draw_detections_cv(IplImage* show_img, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes);
void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output);
void show_image_cv_ipl(IplImage *disp, const char *name);
-image get_image_from_stream_resize(CvCapture *cap, int w, int h, IplImage** in_img, int cpp_video_capture);
+image get_image_from_stream_resize(CvCapture *cap, int w, int h, int c, IplImage** in_img, int cpp_video_capture, int dont_close);
IplImage* in_img;
IplImage* det_img;
IplImage* show_img;
@@ -61,7 +61,8 @@
void *fetch_in_thread(void *ptr)
{
//in = get_image_from_stream(cap);
- in_s = get_image_from_stream_resize(cap, net.w, net.h, &in_img, cpp_video_capture);
+ int dont_close_stream = 0; // set 1 if your IP-camera periodically turns off and turns on video-stream
+ in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, cpp_video_capture, dont_close_stream);
if(!in_s.data){
//error("Stream closed.");
printf("Stream closed.\n");
diff --git a/src/detector.c b/src/detector.c
index 6fc6b67..e099e91 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -87,7 +87,8 @@
load_args args = {0};
args.w = net.w;
args.h = net.h;
- args.paths = paths;
+ args.c = net.c;
+ args.paths = paths;
args.n = imgs;
args.m = plist->size;
args.classes = classes;
@@ -105,7 +106,7 @@
args.hue = net.hue;
#ifdef OPENCV
- args.threads = 3;
+ args.threads = 3 * ngpus;
IplImage* img = NULL;
float max_img_loss = 5;
int number_of_lines = 100;
@@ -388,6 +389,7 @@
load_args args = { 0 };
args.w = net.w;
args.h = net.h;
+ args.c = net.c;
args.type = IMAGE_DATA;
//args.type = LETTERBOX_DATA;
@@ -482,7 +484,7 @@
for (i = 0; i < m; ++i) {
char *path = paths[i];
- image orig = load_image_color(path, 0, 0);
+ image orig = load_image(path, 0, 0, net.c);
image sized = resize_image(orig, net.w, net.h);
char *id = basecfg(path);
network_predict(net, sized.data);
@@ -595,6 +597,7 @@
load_args args = { 0 };
args.w = net.w;
args.h = net.h;
+ args.c = net.c;
args.type = IMAGE_DATA;
//args.type = LETTERBOX_DATA;
@@ -1093,10 +1096,10 @@
if(!input) return;
strtok(input, "\n");
}
- image im = load_image_color(input,0,0);
+ image im = load_image(input,0,0,net.c);
int letterbox = 0;
- //image sized = resize_image(im, net.w, net.h);
- image sized = letterbox_image(im, net.w, net.h); letterbox = 1;
+ image sized = resize_image(im, net.w, net.h);
+ //image sized = letterbox_image(im, net.w, net.h); letterbox = 1;
layer l = net.layers[net.n-1];
//box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
diff --git a/src/http_stream.cpp b/src/http_stream.cpp
index 9192f75..1b65173 100644
--- a/src/http_stream.cpp
+++ b/src/http_stream.cpp
@@ -44,7 +44,7 @@
using std::endl;
#include "opencv2/opencv.hpp"
-#include "opencv2/highgui.hpp"
+#include "opencv2/highgui/highgui.hpp"
#include "opencv2/highgui/highgui_c.h"
#include "opencv2/imgproc/imgproc_c.h"
#ifndef CV_VERSION_EPOCH
@@ -283,19 +283,26 @@
// HSV augmentation
// CV_BGR2HSV, CV_RGB2HSV, CV_HSV2BGR, CV_HSV2RGB
- cv::Mat hsv_src;
- cvtColor(sized, hsv_src, CV_BGR2HSV); // also BGR -> RGB
+ if (ipl->nChannels >= 3)
+ {
+ cv::Mat hsv_src;
+ cvtColor(sized, hsv_src, CV_BGR2HSV); // also BGR -> RGB
- std::vector<cv::Mat> hsv;
- cv::split(hsv_src, hsv);
+ std::vector<cv::Mat> hsv;
+ cv::split(hsv_src, hsv);
- hsv[1] *= dsat;
- hsv[2] *= dexp;
- hsv[0] += 179 * dhue;
+ hsv[1] *= dsat;
+ hsv[2] *= dexp;
+ hsv[0] += 179 * dhue;
- cv::merge(hsv, hsv_src);
+ cv::merge(hsv, hsv_src);
- cvtColor(hsv_src, sized, CV_HSV2RGB); // now RGB instead of BGR
+ cvtColor(hsv_src, sized, CV_HSV2RGB); // now RGB instead of BGR
+ }
+ else
+ {
+ sized *= dexp;
+ }
// Mat -> IplImage -> image
IplImage src = sized;
@@ -305,4 +312,4 @@
}
-#endif // OPENCV
\ No newline at end of file
+#endif // OPENCV
diff --git a/src/image.c b/src/image.c
index 7545e7d..35e8551 100644
--- a/src/image.c
+++ b/src/image.c
@@ -957,7 +957,7 @@
{
IplImage* src = 0;
int flag = -1;
- if (channels == 0) flag = -1;
+ if (channels == 0) flag = 1;
else if (channels == 1) flag = 0;
else if (channels == 3) flag = 1;
else {
@@ -975,7 +975,8 @@
}
image out = ipl_to_image(src);
cvReleaseImage(&src);
- rgbgr_image(out);
+ if (out.c > 1)
+ rgbgr_image(out);
return out;
}
@@ -1010,8 +1011,9 @@
return im;
}
-image get_image_from_stream_resize(CvCapture *cap, int w, int h, IplImage** in_img, int cpp_video_capture)
+image get_image_from_stream_resize(CvCapture *cap, int w, int h, int c, IplImage** in_img, int cpp_video_capture, int dont_close)
{
+ c = c ? c : 3;
IplImage* src;
if (cpp_video_capture) {
static int once = 1;
@@ -1027,16 +1029,24 @@
}
else src = cvQueryFrame(cap);
- if (!src) return make_empty_image(0, 0, 0);
- if (src->width < 1 || src->height < 1 || src->nChannels < 1) return make_empty_image(0, 0, 0);
- IplImage* new_img = cvCreateImage(cvSize(w, h), IPL_DEPTH_8U, 3);
- *in_img = cvCreateImage(cvSize(src->width, src->height), IPL_DEPTH_8U, 3);
+ if (!src) {
+ if (dont_close) src = cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, c);
+ else return make_empty_image(0, 0, 0);
+ }
+ if (src->width < 1 || src->height < 1 || src->nChannels < 1) {
+ if (cpp_video_capture) cvReleaseImage(&src);
+ if (dont_close) src = cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, c);
+ else return make_empty_image(0, 0, 0);
+ }
+ IplImage* new_img = cvCreateImage(cvSize(w, h), IPL_DEPTH_8U, c);
+ *in_img = cvCreateImage(cvSize(src->width, src->height), IPL_DEPTH_8U, c);
cvResize(src, *in_img, CV_INTER_LINEAR);
cvResize(src, new_img, CV_INTER_LINEAR);
image im = ipl_to_image(new_img);
cvReleaseImage(&new_img);
if (cpp_video_capture) cvReleaseImage(&src);
- rgbgr_image(im);
+ if (c>1)
+ rgbgr_image(im);
return im;
}
@@ -1589,16 +1599,23 @@
void distort_image(image im, float hue, float sat, float val)
{
- rgb_to_hsv(im);
- scale_image_channel(im, 1, sat);
- scale_image_channel(im, 2, val);
- int i;
- for(i = 0; i < im.w*im.h; ++i){
- im.data[i] = im.data[i] + hue;
- if (im.data[i] > 1) im.data[i] -= 1;
- if (im.data[i] < 0) im.data[i] += 1;
- }
- hsv_to_rgb(im);
+ if (im.c >= 3)
+ {
+ rgb_to_hsv(im);
+ scale_image_channel(im, 1, sat);
+ scale_image_channel(im, 2, val);
+ int i;
+ for(i = 0; i < im.w*im.h; ++i){
+ im.data[i] = im.data[i] + hue;
+ if (im.data[i] > 1) im.data[i] -= 1;
+ if (im.data[i] < 0) im.data[i] += 1;
+ }
+ hsv_to_rgb(im);
+ }
+ else
+ {
+ scale_image_channel(im, 0, val);
+ }
constrain_image(im);
}
diff --git a/src/network.c b/src/network.c
index 6e50d54..050d334 100644
--- a/src/network.c
+++ b/src/network.c
@@ -582,7 +582,7 @@
box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
int i, j;
- for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
+ for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float));
get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map);
for (j = 0; j < l.w*l.h*l.n; ++j) {
dets[j].classes = l.classes;
diff --git a/src/network.h b/src/network.h
index 01a6ab9..4198c4b 100644
--- a/src/network.h
+++ b/src/network.h
@@ -147,7 +147,7 @@
int get_network_nuisance(network net);
int get_network_background(network net);
-void fuse_conv_batchnorm(network net);
+YOLODLL_API void fuse_conv_batchnorm(network net);
#ifdef __cplusplus
}
diff --git a/src/parser.c b/src/parser.c
index 188ba54..c1ee98c 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -268,7 +268,7 @@
char *a = option_find_str(options, "mask", 0);
int *mask = parse_yolo_mask(a, &num);
- int max_boxes = option_find_int_quiet(options, "max", 30);
+ int max_boxes = option_find_int_quiet(options, "max", 90);
layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes);
if (l.outputs != params.inputs) {
printf("Error: l.outputs == params.inputs \n");
@@ -310,7 +310,7 @@
int coords = option_find_int(options, "coords", 4);
int classes = option_find_int(options, "classes", 20);
int num = option_find_int(options, "num", 1);
- int max_boxes = option_find_int_quiet(options, "max", 30);
+ int max_boxes = option_find_int_quiet(options, "max", 90);
layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords, max_boxes);
if (l.outputs != params.inputs) {
diff --git a/src/region_layer.c b/src/region_layer.c
index a2ca440..4e1e03a 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -297,7 +297,8 @@
box truth = float_to_box(state.truth + t*5 + b*l.truths);
int class_id = state.truth[t * 5 + b*l.truths + 4];
if (class_id >= l.classes) {
- printf("Warning: in txt-labels class_id=%d >= classes=%d in cfg-file\n", class_id, l.classes);
+ printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes-1);
+ getchar();
continue; // if label contains class_id more than number of classes in the cfg-file
}
diff --git a/src/utils.c b/src/utils.c
index 7b25e9c..f1cc21a 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -212,6 +212,9 @@
//find_replace(output_path, "JPEGImages", "labels", output_path); // PascalVOC
find_replace(output_path, "VOC2007/JPEGImages", "VOC2007/labels", output_path); // PascalVOC
find_replace(output_path, "VOC2012/JPEGImages", "VOC2012/labels", output_path); // PascalVOC
+
+ //find_replace(output_path, "/raw/", "/labels/", output_path);
+
// replace only ext of files
find_replace_extension(output_path, ".jpg", ".txt", output_path);
find_replace_extension(output_path, ".JPG", ".txt", output_path); // error
diff --git a/src/yolo_layer.c b/src/yolo_layer.c
index c1309c8..f79bc41 100644
--- a/src/yolo_layer.c
+++ b/src/yolo_layer.c
@@ -202,7 +202,8 @@
box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1);
int class_id = state.truth[t*(4 + 1) + b*l.truths + 4];
if (class_id >= l.classes) {
- printf("Warning: in txt-labels class_id=%d >= classes=%d in cfg-file\n", class_id, l.classes);
+ printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1);
+ getchar();
continue; // if label contains class_id more than number of classes in the cfg-file
}
if(!truth.x) break;
--
Gitblit v1.10.0