From 65bff2683bdffe7ec82eacd8144c70c09d19c88d Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Fri, 16 Feb 2018 20:55:37 +0000
Subject: [PATCH] It takes into account the Difficult for calculating mAP for PascalVOC
---
build/darknet/x64/data/voc.data | 1
scripts/voc_label_difficult.py | 56 ++++++++++++++
build/darknet/x64/calc_mAP_voc_py.cmd | 4
src/blas.c | 82 ++++++++++++++++++-
src/detector.c | 46 ++++++++++-
src/softmax_layer.c | 4
src/blas.h | 2
src/region_layer.c | 2
src/detection_layer.c | 2
9 files changed, 179 insertions(+), 20 deletions(-)
diff --git a/build/darknet/x64/calc_mAP_voc_py.cmd b/build/darknet/x64/calc_mAP_voc_py.cmd
index 0267600..8c5ba3c 100644
--- a/build/darknet/x64/calc_mAP_voc_py.cmd
+++ b/build/darknet/x64/calc_mAP_voc_py.cmd
@@ -3,9 +3,9 @@
rem C:\Users\Alex\AppData\Local\Programs\Python\Python36\Scripts\pip install _pickle
-darknet.exe detector valid data/voc.data tiny-yolo-voc.cfg tiny-yolo-voc.weights
+rem darknet.exe detector valid data/voc.data tiny-yolo-voc.cfg tiny-yolo-voc.weights
-rem darknet.exe detector valid data/voc.data yolo-voc.cfg yolo-voc.weights
+darknet.exe detector valid data/voc.data yolo-voc.cfg yolo-voc.weights
reval_voc_py3.py --year 2007 --classes data\voc.names --image_set test --voc_dir E:\VOC2007_2012\VOCtrainval_11-May-2012\VOCdevkit results
diff --git a/build/darknet/x64/data/voc.data b/build/darknet/x64/data/voc.data
index 0a8524c..63d1a91 100644
--- a/build/darknet/x64/data/voc.data
+++ b/build/darknet/x64/data/voc.data
@@ -1,6 +1,7 @@
classes= 20
train = data/voc/train.txt
valid = data/voc/2007_test.txt
+#difficult = data/voc/difficult_2007_test.txt
names = data/voc.names
backup = backup/
diff --git a/scripts/voc_label_difficult.py b/scripts/voc_label_difficult.py
new file mode 100644
index 0000000..93e57fe
--- /dev/null
+++ b/scripts/voc_label_difficult.py
@@ -0,0 +1,56 @@
+import xml.etree.ElementTree as ET
+import pickle
+import os
+from os import listdir, getcwd
+from os.path import join
+
+sets=[('2012', 'val'),('2007', 'test')]
+
+classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
+
+
+def convert(size, box):
+ dw = 1./size[0]
+ dh = 1./size[1]
+ x = (box[0] + box[1])/2.0
+ y = (box[2] + box[3])/2.0
+ w = box[1] - box[0]
+ h = box[3] - box[2]
+ x = x*dw
+ w = w*dw
+ y = y*dh
+ h = h*dh
+ return (x,y,w,h)
+
+def convert_annotation(year, image_id):
+ in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
+ out_file = open('VOCdevkit/VOC%s/labels/difficult_%s.txt'%(year, image_id), 'w')
+ tree=ET.parse(in_file)
+ root = tree.getroot()
+ size = root.find('size')
+ w = int(size.find('width').text)
+ h = int(size.find('height').text)
+
+ for obj in root.iter('object'):
+ difficult = obj.find('difficult').text
+ cls = obj.find('name').text
+ if cls not in classes or int(difficult) == 0:
+ continue
+ cls_id = classes.index(cls)
+ xmlbox = obj.find('bndbox')
+ b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
+ bb = convert((w,h), b)
+ out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
+
+wd = getcwd()
+
+for year, image_set in sets:
+ if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
+ os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
+ image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
+ list_file = open('difficult_%s_%s.txt'%(year, image_set), 'w')
+ for image_id in image_ids:
+ list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/difficult_%s.jpg\n'%(wd, year, image_id))
+ convert_annotation(year, image_id)
+ list_file.close()
+
diff --git a/src/blas.c b/src/blas.c
index 31bd86b..cb6501f 100644
--- a/src/blas.c
+++ b/src/blas.c
@@ -1,5 +1,6 @@
#include "blas.h"
-#include "math.h"
+
+#include <math.h>
#include <assert.h>
#include <float.h>
#include <stdio.h>
@@ -54,6 +55,16 @@
}
}
+void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc)
+{
+ int i;
+ for(i = 0; i < n; ++i){
+ if(da) da[i] += dc[i] * s[i];
+ if(db) db[i] += dc[i] * (1-s[i]);
+ ds[i] += dc[i] * (a[i] - b[i]);
+ }
+}
+
void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
{
int stride = w1/w2;
@@ -161,12 +172,48 @@
for(i = 0; i < N; ++i) X[i*INCX] = ALPHA;
}
+void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
+{
+ int i, j;
+ int index = 0;
+ for(j = 0; j < B; ++j) {
+ for(i = 0; i < NX; ++i){
+ if(X) X[j*NX + i] += OUT[index];
+ ++index;
+ }
+ for(i = 0; i < NY; ++i){
+ if(Y) Y[j*NY + i] += OUT[index];
+ ++index;
+ }
+ }
+}
+
+void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
+{
+ int i, j;
+ int index = 0;
+ for(j = 0; j < B; ++j) {
+ for(i = 0; i < NX; ++i){
+ OUT[index++] = X[j*NX + i];
+ }
+ for(i = 0; i < NY; ++i){
+ OUT[index++] = Y[j*NY + i];
+ }
+ }
+}
+
void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
{
int i;
for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX];
}
+void mult_add_into_cpu(int N, float *X, float *Y, float *Z)
+{
+ int i;
+ for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i];
+}
+
void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
{
int i;
@@ -179,11 +226,21 @@
}
else {
error[i] = 2*abs_val - 1;
- delta[i] = (diff < 0) ? -1 : 1;
+ delta[i] = (diff < 0) ? 1 : -1;
}
}
}
+void l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
+{
+ int i;
+ for(i = 0; i < n; ++i){
+ float diff = truth[i] - pred[i];
+ error[i] = fabs(diff);
+ delta[i] = diff > 0 ? 1 : -1;
+ }
+}
+
void l2_cpu(int n, float *pred, float *truth, float *delta, float *error)
{
int i;
@@ -202,21 +259,32 @@
return dot;
}
-void softmax(float *input, int n, float temp, float *output)
+void softmax(float *input, int n, float temp, float *output, int stride)
{
int i;
float sum = 0;
float largest = -FLT_MAX;
for(i = 0; i < n; ++i){
- if(input[i] > largest) largest = input[i];
+ if(input[i*stride] > largest) largest = input[i*stride];
}
for(i = 0; i < n; ++i){
- float e = exp(input[i]/temp - largest/temp);
+ float e = exp(input[i*stride]/temp - largest/temp);
sum += e;
- output[i] = e;
+ output[i*stride] = e;
}
for(i = 0; i < n; ++i){
- output[i] /= sum;
+ output[i*stride] /= sum;
+ }
+}
+
+
+void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
+{
+ int g, b;
+ for(b = 0; b < batch; ++b){
+ for(g = 0; g < groups; ++g){
+ softmax(input + b*batch_offset + g*group_offset, n, temp, output + b*batch_offset + g*group_offset, stride);
+ }
}
}
diff --git a/src/blas.h b/src/blas.h
index 3d6ee7d..a5b82ec 100644
--- a/src/blas.h
+++ b/src/blas.h
@@ -35,7 +35,7 @@
void l2_cpu(int n, float *pred, float *truth, float *delta, float *error);
void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c);
-void softmax(float *input, int n, float temp, float *output);
+void softmax(float *input, int n, float temp, float *output, int stride);
#ifdef GPU
#include "cuda.h"
diff --git a/src/detection_layer.c b/src/detection_layer.c
index cd98b4b..fd5a419 100644
--- a/src/detection_layer.c
+++ b/src/detection_layer.c
@@ -59,7 +59,7 @@
for (i = 0; i < locations; ++i) {
int offset = i*l.classes;
softmax(l.output + index + offset, l.classes, 1,
- l.output + index + offset);
+ l.output + index + offset, 1);
}
}
}
diff --git a/src/detector.c b/src/detector.c
index 3111c19..ce259fd 100644
--- a/src/detector.c
+++ b/src/detector.c
@@ -499,9 +499,9 @@
{
int j;
list *options = read_data_cfg(datacfg);
- char *valid_images = option_find_str(options, "valid", "data/train.list");
+ char *valid_images = option_find_str(options, "valid", "data/train.txt");
+ char *difficult_valid_images = option_find_str(options, "difficult", NULL);
char *name_list = option_find_str(options, "names", "data/names.list");
- //char *prefix = option_find_str(options, "results", "results");
char **names = get_labels(name_list);
char *mapf = option_find_str(options, "map", 0);
int *map = 0;
@@ -515,10 +515,16 @@
fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
srand(time(0));
- char *base = "comp4_det_test_";
list *plist = get_paths(valid_images);
char **paths = (char **)list_to_array(plist);
+ char **paths_dif = NULL;
+ if (difficult_valid_images) {
+ list *plist_dif = get_paths(difficult_valid_images);
+ paths_dif = (char **)list_to_array(plist_dif);
+ }
+
+
layer l = net.layers[net.n - 1];
int classes = l.classes;
@@ -574,7 +580,7 @@
}
for (t = 0; t < nthreads && i + t - nthreads < m; ++t) {
const int image_index = i + t - nthreads;
- char *path = paths[i + t - nthreads];
+ char *path = paths[image_index];
char *id = basecfg(path);
float *X = val_resized[t].data;
network_predict(net, X);
@@ -594,6 +600,22 @@
truth_classes_count[truth[j].id]++;
}
+ // difficult
+ box_label *truth_dif = NULL;
+ int num_labels_dif = 0;
+ if (paths_dif)
+ {
+ char *path_dif = paths_dif[image_index];
+
+ char labelpath_dif[4096];
+ find_replace(path_dif, "images", "labels", labelpath_dif);
+ find_replace(labelpath_dif, "JPEGImages", "labels", labelpath_dif);
+ find_replace(labelpath_dif, ".jpg", ".txt", labelpath_dif);
+ find_replace(labelpath_dif, ".JPEG", ".txt", labelpath_dif);
+ find_replace(labelpath_dif, ".png", ".txt", labelpath_dif);
+ truth_dif = read_boxes(labelpath_dif, &num_labels_dif);
+ }
+
for (i = 0; i < (l.w*l.h*l.n); ++i) {
int class_id;
@@ -606,6 +628,8 @@
detections[detections_count - 1].p = prob;
detections[detections_count - 1].image_index = image_index;
detections[detections_count - 1].class_id = class_id;
+ detections[detections_count - 1].truth_flag = 0;
+ detections[detections_count - 1].unique_truth_index = -1;
int truth_index = -1;
float max_iou = 0;
@@ -617,16 +641,27 @@
float current_iou = box_iou(boxes[i], t);
if (current_iou > iou_thresh && class_id == truth[j].id) {
if (current_iou > max_iou) {
- current_iou = max_iou;
+ max_iou = current_iou;
truth_index = unique_truth_index + j;
}
}
}
+
// best IoU
if (truth_index > -1) {
detections[detections_count - 1].truth_flag = 1;
detections[detections_count - 1].unique_truth_index = truth_index;
}
+ else {
+ // if object is difficult then remove detection
+ for (j = 0; j < num_labels_dif; ++j) {
+ box t = { truth_dif[j].x, truth_dif[j].y, truth_dif[j].w, truth_dif[j].h };
+ float current_iou = box_iou(boxes[i], t);
+ if (current_iou > iou_thresh && class_id == truth_dif[j].id) {
+ --detections_count;
+ }
+ }
+ }
}
}
}
@@ -685,7 +720,6 @@
pr[d.class_id][rank].fp++; // false-positive
}
-
for (i = 0; i < classes; ++i)
{
const int tp = pr[i][rank].tp;
diff --git a/src/region_layer.c b/src/region_layer.c
index f7eaef6..d48e8d0 100644
--- a/src/region_layer.c
+++ b/src/region_layer.c
@@ -170,7 +170,7 @@
for (b = 0; b < l.batch; ++b){
for(i = 0; i < l.h*l.w*l.n; ++i){
int index = size*i + b*l.outputs;
- softmax(l.output + index + 5, l.classes, 1, l.output + index + 5);
+ softmax(l.output + index + 5, l.classes, 1, l.output + index + 5, 1);
}
}
}
diff --git a/src/softmax_layer.c b/src/softmax_layer.c
index 5d15314..27f73fd 100644
--- a/src/softmax_layer.c
+++ b/src/softmax_layer.c
@@ -40,7 +40,7 @@
int count = 0;
for(i = 0; i < hierarchy->groups; ++i){
int group_size = hierarchy->group_size[i];
- softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count);
+ softmax(input+b*inputs + count, group_size, temp, output+b*inputs + count, 1);
count += group_size;
}
}
@@ -55,7 +55,7 @@
softmax_tree(state.input, batch, inputs, l.temperature, l.softmax_tree, l.output);
} else {
for(b = 0; b < batch; ++b){
- softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs);
+ softmax(state.input+b*inputs, inputs, l.temperature, l.output+b*inputs, 1);
}
}
}
--
Gitblit v1.10.0