Merge branch 'master' of https://github.com/AlexeyAB/darknet into Fix_get_color_depth
| | |
| | | `darknet.exe detector calc_anchors data/obj.data -num_of_clusters 9 -width 416 -height 416` |
| | | then set the same 9 `anchors` in each of 3 `[yolo]`-layers in your cfg-file |
| | | |
| | | * check that each object are mandatory labeled in your dataset - no one object in your data set should not be without label. In the most training issues - there are wrong labels in your dataset (got labels by using some conversion script, marked with a third-party tool, ...). Always check your dataset by using: https://github.com/AlexeyAB/Yolo_mark |
| | | |
| | | * desirable that your training dataset include images with objects at diffrent: scales, rotations, lightings, from different sides, on different backgrounds |
| | | |
| | | * desirable that your training dataset include images with non-labeled objects that you do not want to detect - negative samples without bounded box (empty `.txt` files) |
| | | |
| | | * for training with a large number of objects in each image, add the parameter `max=200` or higher value in the last layer [region] in your cfg-file |
| | | |
| | | * for training for small objects - set `layers = -1, 11` instead of https://github.com/AlexeyAB/darknet/blob/6390a5a2ab61a0bdf6f1a9a6b4a739c16b36e0d7/cfg/yolov3.cfg#L720 |
| | | and set `stride=4` instead of https://github.com/AlexeyAB/darknet/blob/6390a5a2ab61a0bdf6f1a9a6b4a739c16b36e0d7/cfg/yolov3.cfg#L717 |
| | | |
| | | * General rule - you should keep relative size of objects in the Training and Testing datasets roughly the same: |
| | | |
| | | * `train_network_width * train_obj_width / train_image_width ~= detection_network_width * detection_obj_width / detection_image_width` |
| | | * `train_network_height * train_obj_height / train_image_height ~= detection_network_height * detection_obj_height / detection_image_height` |
| | | |
| | | * to speedup training (with decreasing detection accuracy) do Fine-Tuning instead of Transfer-Learning, set param `stopbackward=1` in one of the penultimate convolutional layers before the 1-st `[yolo]`-layer, for example here: https://github.com/AlexeyAB/darknet/blob/0039fd26786ab5f71d5af725fc18b3f521e7acfd/cfg/yolov3.cfg#L598 |
| | | |
| | | 2. After training - for detection: |
| | |
| | | darknet.exe partial cfg/yolov2.cfg yolov2.weights yolov2.conv.23 23 |
| | | |
| | | |
| | | darknet.exe partial cfg/yolov3.cfg yolov3.weights yolov3.conv.105 105 |
| | | darknet.exe partial cfg/yolov3.cfg yolov3.weights yolov3.conv.81 81 |
| | | |
| | | |
| | | darknet.exe partial cfg/yolov3-tiny.cfg yolov3-tiny.weights yolov3-tiny.conv.15 15 |
| | |
| | | // not detect small objects |
| | | //if ((w < 0.001F || h < 0.001F)) continue; |
| | | // if truth (box for object) is smaller than 1x1 pix |
| | | if ((w < lowest_w || h < lowest_h)) continue; |
| | | char buff[256]; |
| | | if (id >= classes) { |
| | | printf("\n Wrong annotation: class_id = %d. But class_id should be [from 0 to %d] \n", id, classes); |
| | | sprintf(buff, "echo %s \"Wrong annotation: class_id = %d. But class_id should be [from 0 to %d]\" >> bad_label.list", labelpath, id, classes); |
| | | system(buff); |
| | | getchar(); |
| | | continue; |
| | | } |
| | | if ((w < lowest_w || h < lowest_h)) { |
| | | //sprintf(buff, "echo %s \"Very small object: w < lowest_w OR h < lowest_h\" >> bad_label.list", labelpath); |
| | | //system(buff); |
| | | continue; |
| | | } |
| | | if (x == 999999 || y == 999999) { |
| | | printf("\n Wrong annotation: x = 0, y = 0 \n"); |
| | | sprintf(buff, "echo %s \"Wrong annotation: x = 0 or y = 0\" >> bad_label.list", labelpath); |
| | | system(buff); |
| | | continue; |
| | | } |
| | | if (x < 0 || x > 1 || y < 0 || y > 1) { |
| | | printf("\n Wrong annotation: x = %f, y = %f \n", x, y); |
| | | sprintf(buff, "echo %s \"Wrong annotation: x = %f, y = %f\" >> bad_label.list", labelpath, x, y); |
| | | system(buff); |
| | | continue; |
| | | } |
| | | if (w > 1) printf("\n Wrong annotation: w = %f \n", w), w = 1; |
| | | if (h > 1) printf("\n Wrong annotation: h = %f \n", h), h = 1; |
| | | if (w > 1) { |
| | | printf("\n Wrong annotation: w = %f \n", w); |
| | | sprintf(buff, "echo %s \"Wrong annotation: w = %f\" >> bad_label.list", labelpath, w); |
| | | system(buff); |
| | | w = 1; |
| | | } |
| | | if (h > 1) { |
| | | printf("\n Wrong annotation: h = %f \n", h); |
| | | sprintf(buff, "echo %s \"Wrong annotation: h = %f\" >> bad_label.list", labelpath, h); |
| | | system(buff); |
| | | h = 1; |
| | | } |
| | | if (x == 0) x += lowest_w; |
| | | if (y == 0) y += lowest_h; |
| | | |
| | |
| | | |
| | | #include "http_stream.h" |
| | | |
| | | data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object) |
| | | data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object) |
| | | { |
| | | c = c ? c : 3; |
| | | char **random_paths = get_random_paths(paths, n, m); |
| | | int i; |
| | | data d = {0}; |
| | |
| | | |
| | | d.X.rows = n; |
| | | d.X.vals = calloc(d.X.rows, sizeof(float*)); |
| | | d.X.cols = h*w*3; |
| | | d.X.cols = h*w*c; |
| | | |
| | | d.y = make_matrix(n, 5*boxes); |
| | | for(i = 0; i < n; ++i){ |
| | | const char *filename = random_paths[i]; |
| | | |
| | | int flag = 1; |
| | | int flag = (c >= 3); |
| | | IplImage *src; |
| | | if ((src = cvLoadImage(filename, flag)) == 0) |
| | | { |
| | |
| | | return d; |
| | | } |
| | | #else // OPENCV |
| | | data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object) |
| | | data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object) |
| | | { |
| | | c = c ? c : 3; |
| | | char **random_paths = get_random_paths(paths, n, m); |
| | | int i; |
| | | data d = { 0 }; |
| | |
| | | |
| | | d.X.rows = n; |
| | | d.X.vals = calloc(d.X.rows, sizeof(float*)); |
| | | d.X.cols = h*w * 3; |
| | | d.X.cols = h*w*c; |
| | | |
| | | d.y = make_matrix(n, 5 * boxes); |
| | | for (i = 0; i < n; ++i) { |
| | | image orig = load_image_color(random_paths[i], 0, 0); |
| | | image orig = load_image(random_paths[i], 0, 0, c); |
| | | |
| | | int oh = orig.h; |
| | | int ow = orig.w; |
| | |
| | | } else if (a.type == REGION_DATA){ |
| | | *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure); |
| | | } else if (a.type == DETECTION_DATA){ |
| | | *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.flip, a.jitter, a.hue, a.saturation, a.exposure, a.small_object); |
| | | *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.c, a.num_boxes, a.classes, a.flip, a.jitter, a.hue, a.saturation, a.exposure, a.small_object); |
| | | } else if (a.type == SWAG_DATA){ |
| | | *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter); |
| | | } else if (a.type == COMPARE_DATA){ |
| | | *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h); |
| | | } else if (a.type == IMAGE_DATA){ |
| | | *(a.im) = load_image_color(a.path, 0, 0); |
| | | *(a.im) = load_image(a.path, 0, 0, a.c); |
| | | *(a.resized) = resize_image(*(a.im), a.w, a.h); |
| | | }else if (a.type == LETTERBOX_DATA) { |
| | | *(a.im) = load_image_color(a.path, 0, 0); |
| | | *(a.im) = load_image(a.path, 0, 0, a.c); |
| | | *(a.resized) = letterbox_image(*(a.im), a.w, a.h); |
| | | } else if (a.type == TAG_DATA){ |
| | | *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.flip, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); |
| | |
| | | char **labels; |
| | | int h; |
| | | int w; |
| | | int c; // color depth |
| | | int out_w; |
| | | int out_h; |
| | | int nh; |
| | |
| | | data load_data_captcha(char **paths, int n, int m, int k, int w, int h); |
| | | data load_data_captcha_encode(char **paths, int n, int m, int w, int h); |
| | | data load_data_old(char **paths, int n, int m, char **labels, int k, int w, int h); |
| | | data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object); |
| | | data load_data_detection(int n, char **paths, int m, int w, int h, int c, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object); |
| | | data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); |
| | | matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); |
| | | data load_data_super(char **paths, int n, int m, int w, int h, int scale); |
| | |
| | | void draw_detections_cv(IplImage* show_img, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes); |
| | | void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes, int ext_output); |
| | | void show_image_cv_ipl(IplImage *disp, const char *name); |
| | | image get_image_from_stream_resize(CvCapture *cap, int w, int h, IplImage** in_img, int cpp_video_capture); |
| | | image get_image_from_stream_resize(CvCapture *cap, int w, int h, int c, IplImage** in_img, int cpp_video_capture, int dont_close); |
| | | IplImage* in_img; |
| | | IplImage* det_img; |
| | | IplImage* show_img; |
| | |
| | | void *fetch_in_thread(void *ptr) |
| | | { |
| | | //in = get_image_from_stream(cap); |
| | | in_s = get_image_from_stream_resize(cap, net.w, net.h, &in_img, cpp_video_capture); |
| | | int dont_close_stream = 0; // set 1 if your IP-camera periodically turns off and turns on video-stream |
| | | in_s = get_image_from_stream_resize(cap, net.w, net.h, net.c, &in_img, cpp_video_capture, dont_close_stream); |
| | | if(!in_s.data){ |
| | | //error("Stream closed."); |
| | | printf("Stream closed.\n"); |
| | |
| | | load_args args = {0}; |
| | | args.w = net.w; |
| | | args.h = net.h; |
| | | args.c = net.c; |
| | | args.paths = paths; |
| | | args.n = imgs; |
| | | args.m = plist->size; |
| | |
| | | args.hue = net.hue; |
| | | |
| | | #ifdef OPENCV |
| | | args.threads = 3; |
| | | args.threads = 3 * ngpus; |
| | | IplImage* img = NULL; |
| | | float max_img_loss = 5; |
| | | int number_of_lines = 100; |
| | |
| | | load_args args = { 0 }; |
| | | args.w = net.w; |
| | | args.h = net.h; |
| | | args.c = net.c; |
| | | args.type = IMAGE_DATA; |
| | | //args.type = LETTERBOX_DATA; |
| | | |
| | |
| | | |
| | | for (i = 0; i < m; ++i) { |
| | | char *path = paths[i]; |
| | | image orig = load_image_color(path, 0, 0); |
| | | image orig = load_image(path, 0, 0, net.c); |
| | | image sized = resize_image(orig, net.w, net.h); |
| | | char *id = basecfg(path); |
| | | network_predict(net, sized.data); |
| | |
| | | load_args args = { 0 }; |
| | | args.w = net.w; |
| | | args.h = net.h; |
| | | args.c = net.c; |
| | | args.type = IMAGE_DATA; |
| | | //args.type = LETTERBOX_DATA; |
| | | |
| | |
| | | if(!input) return; |
| | | strtok(input, "\n"); |
| | | } |
| | | image im = load_image_color(input,0,0); |
| | | image im = load_image(input,0,0,net.c); |
| | | int letterbox = 0; |
| | | //image sized = resize_image(im, net.w, net.h); |
| | | image sized = letterbox_image(im, net.w, net.h); letterbox = 1; |
| | | image sized = resize_image(im, net.w, net.h); |
| | | //image sized = letterbox_image(im, net.w, net.h); letterbox = 1; |
| | | layer l = net.layers[net.n-1]; |
| | | |
| | | //box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); |
| | |
| | | using std::endl; |
| | | |
| | | #include "opencv2/opencv.hpp" |
| | | #include "opencv2/highgui.hpp" |
| | | #include "opencv2/highgui/highgui.hpp" |
| | | #include "opencv2/highgui/highgui_c.h" |
| | | #include "opencv2/imgproc/imgproc_c.h" |
| | | #ifndef CV_VERSION_EPOCH |
| | |
| | | |
| | | // HSV augmentation |
| | | // CV_BGR2HSV, CV_RGB2HSV, CV_HSV2BGR, CV_HSV2RGB |
| | | if (ipl->nChannels >= 3) |
| | | { |
| | | cv::Mat hsv_src; |
| | | cvtColor(sized, hsv_src, CV_BGR2HSV); // also BGR -> RGB |
| | | |
| | |
| | | cv::merge(hsv, hsv_src); |
| | | |
| | | cvtColor(hsv_src, sized, CV_HSV2RGB); // now RGB instead of BGR |
| | | } |
| | | else |
| | | { |
| | | sized *= dexp; |
| | | } |
| | | |
| | | // Mat -> IplImage -> image |
| | | IplImage src = sized; |
| | |
| | | { |
| | | IplImage* src = 0; |
| | | int flag = -1; |
| | | if (channels == 0) flag = -1; |
| | | if (channels == 0) flag = 1; |
| | | else if (channels == 1) flag = 0; |
| | | else if (channels == 3) flag = 1; |
| | | else { |
| | |
| | | } |
| | | image out = ipl_to_image(src); |
| | | cvReleaseImage(&src); |
| | | if (out.c > 1) |
| | | rgbgr_image(out); |
| | | return out; |
| | | } |
| | |
| | | return im; |
| | | } |
| | | |
| | | image get_image_from_stream_resize(CvCapture *cap, int w, int h, IplImage** in_img, int cpp_video_capture) |
| | | image get_image_from_stream_resize(CvCapture *cap, int w, int h, int c, IplImage** in_img, int cpp_video_capture, int dont_close) |
| | | { |
| | | c = c ? c : 3; |
| | | IplImage* src; |
| | | if (cpp_video_capture) { |
| | | static int once = 1; |
| | |
| | | } |
| | | else src = cvQueryFrame(cap); |
| | | |
| | | if (!src) return make_empty_image(0, 0, 0); |
| | | if (src->width < 1 || src->height < 1 || src->nChannels < 1) return make_empty_image(0, 0, 0); |
| | | IplImage* new_img = cvCreateImage(cvSize(w, h), IPL_DEPTH_8U, 3); |
| | | *in_img = cvCreateImage(cvSize(src->width, src->height), IPL_DEPTH_8U, 3); |
| | | if (!src) { |
| | | if (dont_close) src = cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, c); |
| | | else return make_empty_image(0, 0, 0); |
| | | } |
| | | if (src->width < 1 || src->height < 1 || src->nChannels < 1) { |
| | | if (cpp_video_capture) cvReleaseImage(&src); |
| | | if (dont_close) src = cvCreateImage(cvSize(416, 416), IPL_DEPTH_8U, c); |
| | | else return make_empty_image(0, 0, 0); |
| | | } |
| | | IplImage* new_img = cvCreateImage(cvSize(w, h), IPL_DEPTH_8U, c); |
| | | *in_img = cvCreateImage(cvSize(src->width, src->height), IPL_DEPTH_8U, c); |
| | | cvResize(src, *in_img, CV_INTER_LINEAR); |
| | | cvResize(src, new_img, CV_INTER_LINEAR); |
| | | image im = ipl_to_image(new_img); |
| | | cvReleaseImage(&new_img); |
| | | if (cpp_video_capture) cvReleaseImage(&src); |
| | | if (c>1) |
| | | rgbgr_image(im); |
| | | return im; |
| | | } |
| | |
| | | |
| | | void distort_image(image im, float hue, float sat, float val) |
| | | { |
| | | if (im.c >= 3) |
| | | { |
| | | rgb_to_hsv(im); |
| | | scale_image_channel(im, 1, sat); |
| | | scale_image_channel(im, 2, val); |
| | |
| | | if (im.data[i] < 0) im.data[i] += 1; |
| | | } |
| | | hsv_to_rgb(im); |
| | | } |
| | | else |
| | | { |
| | | scale_image_channel(im, 0, val); |
| | | } |
| | | constrain_image(im); |
| | | } |
| | | |
| | |
| | | box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); |
| | | float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); |
| | | int i, j; |
| | | for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *)); |
| | | for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float)); |
| | | get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map); |
| | | for (j = 0; j < l.w*l.h*l.n; ++j) { |
| | | dets[j].classes = l.classes; |
| | |
| | | |
| | | int get_network_nuisance(network net); |
| | | int get_network_background(network net); |
| | | void fuse_conv_batchnorm(network net); |
| | | YOLODLL_API void fuse_conv_batchnorm(network net); |
| | | |
| | | #ifdef __cplusplus |
| | | } |
| | |
| | | |
| | | char *a = option_find_str(options, "mask", 0); |
| | | int *mask = parse_yolo_mask(a, &num); |
| | | int max_boxes = option_find_int_quiet(options, "max", 30); |
| | | int max_boxes = option_find_int_quiet(options, "max", 90); |
| | | layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes); |
| | | if (l.outputs != params.inputs) { |
| | | printf("Error: l.outputs == params.inputs \n"); |
| | |
| | | int coords = option_find_int(options, "coords", 4); |
| | | int classes = option_find_int(options, "classes", 20); |
| | | int num = option_find_int(options, "num", 1); |
| | | int max_boxes = option_find_int_quiet(options, "max", 30); |
| | | int max_boxes = option_find_int_quiet(options, "max", 90); |
| | | |
| | | layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords, max_boxes); |
| | | if (l.outputs != params.inputs) { |
| | |
| | | box truth = float_to_box(state.truth + t*5 + b*l.truths); |
| | | int class_id = state.truth[t * 5 + b*l.truths + 4]; |
| | | if (class_id >= l.classes) { |
| | | printf("Warning: in txt-labels class_id=%d >= classes=%d in cfg-file\n", class_id, l.classes); |
| | | printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes-1); |
| | | getchar(); |
| | | continue; // if label contains class_id more than number of classes in the cfg-file |
| | | } |
| | | |
| | |
| | | //find_replace(output_path, "JPEGImages", "labels", output_path); // PascalVOC |
| | | find_replace(output_path, "VOC2007/JPEGImages", "VOC2007/labels", output_path); // PascalVOC |
| | | find_replace(output_path, "VOC2012/JPEGImages", "VOC2012/labels", output_path); // PascalVOC |
| | | |
| | | //find_replace(output_path, "/raw/", "/labels/", output_path); |
| | | |
| | | // replace only ext of files |
| | | find_replace_extension(output_path, ".jpg", ".txt", output_path); |
| | | find_replace_extension(output_path, ".JPG", ".txt", output_path); // error |
| | |
| | | box truth = float_to_box_stride(state.truth + t*(4 + 1) + b*l.truths, 1); |
| | | int class_id = state.truth[t*(4 + 1) + b*l.truths + 4]; |
| | | if (class_id >= l.classes) { |
| | | printf("Warning: in txt-labels class_id=%d >= classes=%d in cfg-file\n", class_id, l.classes); |
| | | printf(" Warning: in txt-labels class_id=%d >= classes=%d in cfg-file. In txt-labels class_id should be [from 0 to %d] \n", class_id, l.classes, l.classes - 1); |
| | | getchar(); |
| | | continue; // if label contains class_id more than number of classes in the cfg-file |
| | | } |
| | | if(!truth.x) break; |