From b8e6e80c6d411d05a9e09f1e3676eb9a7f3ea0e8 Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Fri, 03 Aug 2018 11:35:03 +0000
Subject: [PATCH] Added spatial Yolo v3 yolov3-spp.cfg

---
 src/yolo.c |   95 ++++++++++++++++-------------------------------
 1 files changed, 32 insertions(+), 63 deletions(-)

diff --git a/src/yolo.c b/src/yolo.c
index fefa763..4d344f0 100644
--- a/src/yolo.c
+++ b/src/yolo.c
@@ -8,17 +8,20 @@
 
 #ifdef OPENCV
 #include "opencv2/highgui/highgui_c.h"
+#include "opencv2/imgproc/imgproc_c.h"
+#include "opencv2/core/version.hpp"
+#ifndef CV_VERSION_EPOCH
+#include "opencv2/videoio/videoio_c.h"
+#endif
 #endif
 
 char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
-image voc_labels[20];
 
 void train_yolo(char *cfgfile, char *weightfile)
 {
     char *train_images = "/data/voc/train.txt";
     char *backup_directory = "/home/pjreddie/backup/";
     srand(time(0));
-    data_seed = time(0);
     char *base = basecfg(cfgfile);
     printf("%s\n", base);
     float avg_loss = -1;
@@ -89,34 +92,6 @@
     save_weights(net, buff);
 }
 
-void convert_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness)
-{
-    int i,j,n;
-    //int per_cell = 5*num+classes;
-    for (i = 0; i < side*side; ++i){
-        int row = i / side;
-        int col = i % side;
-        for(n = 0; n < num; ++n){
-            int index = i*num + n;
-            int p_index = side*side*classes + i*num + n;
-            float scale = predictions[p_index];
-            int box_index = side*side*(classes + num) + (i*num + n)*4;
-            boxes[index].x = (predictions[box_index + 0] + col) / side * w;
-            boxes[index].y = (predictions[box_index + 1] + row) / side * h;
-            boxes[index].w = pow(predictions[box_index + 2], (square?2:1)) * w;
-            boxes[index].h = pow(predictions[box_index + 3], (square?2:1)) * h;
-            for(j = 0; j < classes; ++j){
-                int class_index = i*classes;
-                float prob = scale*predictions[class_index+j];
-                probs[index][j] = (prob > thresh) ? prob : 0;
-            }
-            if(only_objectness){
-                probs[index][0] = scale;
-            }
-        }
-    }
-}
-
 void print_yolo_detections(FILE **fps, char *id, box *boxes, float **probs, int total, int classes, int w, int h)
 {
     int i, j;
@@ -156,8 +131,6 @@
 
     layer l = net.layers[net.n-1];
     int classes = l.classes;
-    int square = l.sqrt;
-    int side = l.side;
 
     int j;
     FILE **fps = calloc(classes, sizeof(FILE *));
@@ -166,9 +139,9 @@
         snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
         fps[j] = fopen(buff, "w");
     }
-    box *boxes = calloc(side*side*l.n, sizeof(box));
-    float **probs = calloc(side*side*l.n, sizeof(float *));
-    for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
+    box *boxes = calloc(l.side*l.side*l.n, sizeof(box));
+    float **probs = calloc(l.side*l.side*l.n, sizeof(float *));
+    for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
 
     int m = plist->size;
     int i=0;
@@ -178,7 +151,7 @@
     int nms = 1;
     float iou_thresh = .5;
 
-    int nthreads = 2;
+    int nthreads = 8;
     image *val = calloc(nthreads, sizeof(image));
     image *val_resized = calloc(nthreads, sizeof(image));
     image *buf = calloc(nthreads, sizeof(image));
@@ -214,12 +187,12 @@
             char *path = paths[i+t-nthreads];
             char *id = basecfg(path);
             float *X = val_resized[t].data;
-            float *predictions = network_predict(net, X);
+            network_predict(net, X);
             int w = val[t].w;
             int h = val[t].h;
-            convert_detections(predictions, classes, l.n, square, side, w, h, thresh, probs, boxes, 0);
-            if (nms) do_nms_sort(boxes, probs, side*side*l.n, classes, iou_thresh);
-            print_yolo_detections(fps, id, boxes, probs, side*side*l.n, classes, w, h);
+            get_detection_boxes(l, w, h, thresh, probs, boxes, 0);
+            if (nms) do_nms_sort_v2(boxes, probs, l.side*l.side*l.n, classes, iou_thresh);
+            print_yolo_detections(fps, id, boxes, probs, l.side*l.side*l.n, classes, w, h);
             free(id);
             free_image(val[t]);
             free_image(val_resized[t]);
@@ -244,7 +217,6 @@
 
     layer l = net.layers[net.n-1];
     int classes = l.classes;
-    int square = l.sqrt;
     int side = l.side;
 
     int j, k;
@@ -275,14 +247,12 @@
         image orig = load_image_color(path, 0, 0);
         image sized = resize_image(orig, net.w, net.h);
         char *id = basecfg(path);
-        float *predictions = network_predict(net, sized.data);
-        convert_detections(predictions, classes, l.n, square, side, 1, 1, thresh, probs, boxes, 1);
+        network_predict(net, sized.data);
+        get_detection_boxes(l, orig.w, orig.h, thresh, probs, boxes, 1);
         if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms);
 
-        char *labelpath = find_replace(path, "images", "labels");
-        labelpath = find_replace(labelpath, "JPEGImages", "labels");
-        labelpath = find_replace(labelpath, ".jpg", ".txt");
-        labelpath = find_replace(labelpath, ".JPEG", ".txt");
+        char labelpath[4096];
+		replace_image_to_label(path, labelpath);
 
         int num_labels = 0;
         box_label *truth = read_boxes(labelpath, &num_labels);
@@ -316,7 +286,7 @@
 
 void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
 {
-
+    image **alphabet = load_alphabet();
     network net = parse_network_cfg(cfgfile);
     if(weightfile){
         load_weights(&net, weightfile);
@@ -328,7 +298,7 @@
     char buff[256];
     char *input = buff;
     int j;
-    float nms=.5;
+    float nms=.4;
     box *boxes = calloc(l.side*l.side*l.n, sizeof(box));
     float **probs = calloc(l.side*l.side*l.n, sizeof(float *));
     for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
@@ -346,16 +316,15 @@
         image sized = resize_image(im, net.w, net.h);
         float *X = sized.data;
         time=clock();
-        float *predictions = network_predict(net, X);
+        network_predict(net, X);
         printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
-        convert_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0);
-        if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
-        //draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20);
-        draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20);
+        get_detection_boxes(l, 1, 1, thresh, probs, boxes, 0);
+        if (nms) do_nms_sort_v2(boxes, probs, l.side*l.side*l.n, l.classes, nms);
+        //draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, alphabet, 20);
+        draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, alphabet, 20);
         save_image(im, "predictions");
         show_image(im, "predictions");
 
-        show_image(sized, "resized");
         free_image(im);
         free_image(sized);
 #ifdef OPENCV
@@ -368,16 +337,15 @@
 
 void run_yolo(int argc, char **argv)
 {
-    int i;
-    for(i = 0; i < 20; ++i){
-        char buff[256];
-        sprintf(buff, "data/labels/%s.png", voc_names[i]);
-        voc_labels[i] = load_image_color(buff, 0, 0);
-    }
-
+	int dont_show = find_arg(argc, argv, "-dont_show");
+	int http_stream_port = find_int_arg(argc, argv, "-http_port", -1);
+	char *out_filename = find_char_arg(argc, argv, "-out_filename", 0);
+    char *prefix = find_char_arg(argc, argv, "-prefix", 0);
     float thresh = find_float_arg(argc, argv, "-thresh", .2);
+	float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
     int cam_index = find_int_arg(argc, argv, "-c", 0);
     int frame_skip = find_int_arg(argc, argv, "-s", 0);
+	int ext_output = find_arg(argc, argv, "-ext_output");
     if(argc < 4){
         fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
         return;
@@ -390,5 +358,6 @@
     else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights);
     else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights);
     else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights);
-    else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, voc_labels, 20, frame_skip);
+    else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, hier_thresh, cam_index, filename, voc_names, 20, frame_skip,
+		prefix, out_filename, http_stream_port, dont_show, ext_output);
 }

--
Gitblit v1.10.0