From 1b5afb45838e603fa6780762eb8cc59246dc2d81 Mon Sep 17 00:00:00 2001
From: IlyaOvodov <b@ovdv.ru>
Date: Tue, 08 May 2018 11:09:35 +0000
Subject: [PATCH] Output improvements for detector results: When printing detector results, output was done in random order, obfuscating results for interpreting. Now: 1. Text output includes coordinates of rects in (left,right,top,bottom in pixels) along with label and score 2. Text output is sorted by rect lefts to simplify finding appropriate rects on image 3. If several class probs are > thresh for some detection, the most probable is written first and coordinates for others are not repeated 4. Rects are imprinted in image in order by their best class prob, so most probable rects are always on top and not overlayed by less probable ones 5. Most probable label for rect is always written first Also: 6. Message about low GPU memory include required amount
---
src/data.c | 270 +++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 206 insertions(+), 64 deletions(-)
diff --git a/src/data.c b/src/data.c
index 20d5748..3b014b4 100644
--- a/src/data.c
+++ b/src/data.c
@@ -29,7 +29,7 @@
int i;
pthread_mutex_lock(&mutex);
for(i = 0; i < n; ++i){
- int index = rand()%m;
+ int index = random_gen()%m;
indexes[i] = index;
random_paths[i] = paths[index];
if(i == 0) printf("%s\n", paths[index]);
@@ -44,10 +44,12 @@
char **random_paths = calloc(n, sizeof(char*));
int i;
pthread_mutex_lock(&mutex);
- for(i = 0; i < n; ++i){
- int index = rand()%m;
+ //printf("n = %d \n", n);
+ for(i = 0; i < n; ++i){
+ int index = random_gen() % m;
random_paths[i] = paths[index];
//if(i == 0) printf("%s\n", paths[index]);
+ //printf("grp: %s\n", paths[index]);
}
pthread_mutex_unlock(&mutex);
return random_paths;
@@ -102,7 +104,7 @@
return X;
}
-matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
+matrix load_image_augment_paths(char **paths, int n, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
{
int i;
matrix X;
@@ -113,8 +115,9 @@
for(i = 0; i < n; ++i){
image im = load_image_color(paths[i], 0, 0);
image crop = random_augment_image(im, angle, aspect, min, max, size);
- int flip = rand()%2;
- if (flip) flip_image(crop);
+ int flip = use_flip ? random_gen() % 2 : 0;
+ if (flip)
+ flip_image(crop);
random_distort_image(crop, hue, saturation, exposure);
/*
@@ -161,7 +164,7 @@
int i;
for(i = 0; i < n; ++i){
box_label swap = b[i];
- int index = rand()%n;
+ int index = random_gen()%n;
b[i] = b[index];
b[index] = swap;
}
@@ -171,6 +174,13 @@
{
int i;
for(i = 0; i < n; ++i){
+ if(boxes[i].x == 0 && boxes[i].y == 0) {
+ boxes[i].x = 999999;
+ boxes[i].y = 999999;
+ boxes[i].w = 999999;
+ boxes[i].h = 999999;
+ continue;
+ }
boxes[i].left = boxes[i].left * sx - dx;
boxes[i].right = boxes[i].right * sx - dx;
boxes[i].top = boxes[i].top * sy - dy;
@@ -260,7 +270,7 @@
h = boxes[i].h;
id = boxes[i].id;
- if (w < .01 || h < .01) continue;
+ if (w < .001 || h < .001) continue;
int col = (int)(x*num_boxes);
int row = (int)(y*num_boxes);
@@ -283,24 +293,35 @@
free(boxes);
}
-void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy)
+void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy,
+ int small_object, int net_w, int net_h)
{
char labelpath[4096];
find_replace(path, "images", "labels", labelpath);
find_replace(labelpath, "JPEGImages", "labels", labelpath);
+ find_replace(labelpath, "raw", "labels", labelpath);
find_replace(labelpath, ".jpg", ".txt", labelpath);
find_replace(labelpath, ".png", ".txt", labelpath);
+ find_replace(labelpath, ".bmp", ".txt", labelpath);
find_replace(labelpath, ".JPG", ".txt", labelpath);
find_replace(labelpath, ".JPEG", ".txt", labelpath);
int count = 0;
+ int i;
box_label *boxes = read_boxes(labelpath, &count);
+ float lowest_w = 1.F / net_w;
+ float lowest_h = 1.F / net_h;
+ if (small_object == 1) {
+ for (i = 0; i < count; ++i) {
+ if (boxes[i].w < lowest_w) boxes[i].w = lowest_w;
+ if (boxes[i].h < lowest_h) boxes[i].h = lowest_h;
+ }
+ }
randomize_boxes(boxes, count);
correct_boxes(boxes, count, dx, dy, sx, sy, flip);
if(count > num_boxes) count = num_boxes;
float x,y,w,h;
int id;
- int i;
for (i = 0; i < count; ++i) {
x = boxes[i].x;
@@ -309,7 +330,10 @@
h = boxes[i].h;
id = boxes[i].id;
- if (w < .01 || h < .01) continue;
+ // not detect small objects
+ //if ((w < 0.001F || h < 0.001F)) continue;
+ // if truth (box for object) is smaller than 1x1 pix
+ if ((w < lowest_w || h < lowest_h)) continue;
truth[i*5+0] = x;
truth[i*5+1] = y;
@@ -388,12 +412,47 @@
if(count != 1) printf("Too many or too few labels: %d, %s\n", count, path);
}
-matrix load_labels_paths(char **paths, int n, char **labels, int k)
+void fill_hierarchy(float *truth, int k, tree *hierarchy)
+{
+ int j;
+ for(j = 0; j < k; ++j){
+ if(truth[j]){
+ int parent = hierarchy->parent[j];
+ while(parent >= 0){
+ truth[parent] = 1;
+ parent = hierarchy->parent[parent];
+ }
+ }
+ }
+ int i;
+ int count = 0;
+ for(j = 0; j < hierarchy->groups; ++j){
+ //printf("%d\n", count);
+ int mask = 1;
+ for(i = 0; i < hierarchy->group_size[j]; ++i){
+ if(truth[count + i]){
+ mask = 0;
+ break;
+ }
+ }
+ if (mask) {
+ for(i = 0; i < hierarchy->group_size[j]; ++i){
+ truth[count + i] = SECRET_NUM;
+ }
+ }
+ count += hierarchy->group_size[j];
+ }
+}
+
+matrix load_labels_paths(char **paths, int n, char **labels, int k, tree *hierarchy)
{
matrix y = make_matrix(n, k);
int i;
for(i = 0; i < n && labels; ++i){
fill_truth(paths[i], labels, k, y.vals[i]);
+ if(hierarchy){
+ fill_hierarchy(y.vals[i], k, hierarchy);
+ }
}
return y;
}
@@ -479,7 +538,7 @@
float sx = (float)swidth / ow;
float sy = (float)sheight / oh;
- int flip = rand()%2;
+ int flip = random_gen()%2;
image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
float dx = ((float)pleft/ow)/sx;
@@ -540,7 +599,7 @@
while(fscanf(fp2, "%d %f", &id, &iou) == 2){
if (d.y.vals[i][2*id + 1] < iou) d.y.vals[i][2*id + 1] = iou;
}
-
+
for (j = 0; j < classes; ++j){
if (d.y.vals[i][2*j] > .5 && d.y.vals[i][2*j+1] < .5){
d.y.vals[i][2*j] = 1;
@@ -565,9 +624,9 @@
data load_data_swag(char **paths, int n, int classes, float jitter)
{
- int index = rand()%n;
+ int index = random_gen()%n;
char *random_path = paths[index];
-
+
image orig = load_image_color(random_path, 0, 0);
int h = orig.h;
int w = orig.w;
@@ -598,7 +657,7 @@
float sx = (float)swidth / w;
float sy = (float)sheight / h;
- int flip = rand()%2;
+ int flip = random_gen()%2;
image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
float dx = ((float)pleft/w)/sx;
@@ -616,7 +675,18 @@
return d;
}
-data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure)
+#ifdef OPENCV
+#include "opencv2/highgui/highgui_c.h"
+#include "opencv2/imgproc/imgproc_c.h"
+#include "opencv2/core/version.hpp"
+#ifndef CV_VERSION_EPOCH
+#include "opencv2/videoio/videoio_c.h"
+#include "opencv2/imgcodecs/imgcodecs_c.h"
+#endif
+
+#include "http_stream.h"
+
+data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
{
char **random_paths = get_random_paths(paths, n, m);
int i;
@@ -629,18 +699,30 @@
d.y = make_matrix(n, 5*boxes);
for(i = 0; i < n; ++i){
- image orig = load_image_color(random_paths[i], 0, 0);
+ const char *filename = random_paths[i];
- int oh = orig.h;
- int ow = orig.w;
+ int flag = 1;
+ IplImage *src;
+ if ((src = cvLoadImage(filename, flag)) == 0)
+ {
+ fprintf(stderr, "Cannot load image \"%s\"\n", filename);
+ char buff[256];
+ sprintf(buff, "echo %s >> bad.list", filename);
+ system(buff);
+ continue;
+ //exit(0);
+ }
+
+ int oh = src->height;
+ int ow = src->width;
int dw = (ow*jitter);
int dh = (oh*jitter);
- int pleft = rand_uniform(-dw, dw);
- int pright = rand_uniform(-dw, dw);
- int ptop = rand_uniform(-dh, dh);
- int pbot = rand_uniform(-dh, dh);
+ int pleft = rand_uniform_strong(-dw, dw);
+ int pright = rand_uniform_strong(-dw, dw);
+ int ptop = rand_uniform_strong(-dh, dh);
+ int pbot = rand_uniform_strong(-dh, dh);
int swidth = ow - pleft - pright;
int sheight = oh - ptop - pbot;
@@ -648,30 +730,86 @@
float sx = (float)swidth / ow;
float sy = (float)sheight / oh;
- int flip = rand()%2;
- image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
+ int flip = use_flip ? random_gen()%2 : 0;
float dx = ((float)pleft/ow)/sx;
float dy = ((float)ptop /oh)/sy;
- image sized = resize_image(cropped, w, h);
- if(flip) flip_image(sized);
- random_distort_image(sized, hue, saturation, exposure);
- d.X.vals[i] = sized.data;
+ float dhue = rand_uniform_strong(-hue, hue);
+ float dsat = rand_scale(saturation);
+ float dexp = rand_scale(exposure);
- fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy);
+ image ai = image_data_augmentation(src, w, h, pleft, ptop, swidth, sheight, flip, jitter, dhue, dsat, dexp);
+ d.X.vals[i] = ai.data;
+
+ //show_image(ai, "aug");
+ //cvWaitKey(0);
- free_image(orig);
- free_image(cropped);
+ fill_truth_detection(filename, boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy, small_object, w, h);
+
+ cvReleaseImage(&src);
}
free(random_paths);
return d;
}
+#else // OPENCV
+data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, int use_flip, float jitter, float hue, float saturation, float exposure, int small_object)
+{
+ char **random_paths = get_random_paths(paths, n, m);
+ int i;
+ data d = { 0 };
+ d.shallow = 0;
+ d.X.rows = n;
+ d.X.vals = calloc(d.X.rows, sizeof(float*));
+ d.X.cols = h*w * 3;
+
+ d.y = make_matrix(n, 5 * boxes);
+ for (i = 0; i < n; ++i) {
+ image orig = load_image_color(random_paths[i], 0, 0);
+
+ int oh = orig.h;
+ int ow = orig.w;
+
+ int dw = (ow*jitter);
+ int dh = (oh*jitter);
+
+ int pleft = rand_uniform_strong(-dw, dw);
+ int pright = rand_uniform_strong(-dw, dw);
+ int ptop = rand_uniform_strong(-dh, dh);
+ int pbot = rand_uniform_strong(-dh, dh);
+
+ int swidth = ow - pleft - pright;
+ int sheight = oh - ptop - pbot;
+
+ float sx = (float)swidth / ow;
+ float sy = (float)sheight / oh;
+
+ int flip = use_flip ? random_gen() % 2 : 0;
+ image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
+
+ float dx = ((float)pleft / ow) / sx;
+ float dy = ((float)ptop / oh) / sy;
+
+ image sized = resize_image(cropped, w, h);
+ if (flip) flip_image(sized);
+ random_distort_image(sized, hue, saturation, exposure);
+ d.X.vals[i] = sized.data;
+
+ fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1. / sx, 1. / sy, small_object, w, h);
+
+ free_image(orig);
+ free_image(cropped);
+ }
+ free(random_paths);
+ return d;
+}
+#endif // OPENCV
void *load_thread(void *ptr)
{
- //printf("Loading data: %d\n", rand());
+ //srand(time(0));
+ //printf("Loading data: %d\n", random_gen());
load_args a = *(struct load_args*)ptr;
if(a.exposure == 0) a.exposure = 1;
if(a.saturation == 0) a.saturation = 1;
@@ -680,7 +818,7 @@
if (a.type == OLD_CLASSIFICATION_DATA){
*a.d = load_data_old(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
} else if (a.type == CLASSIFICATION_DATA){
- *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
+ *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.hierarchy, a.flip, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
} else if (a.type == SUPER_DATA){
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
} else if (a.type == WRITING_DATA){
@@ -688,7 +826,7 @@
} else if (a.type == REGION_DATA){
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
} else if (a.type == DETECTION_DATA){
- *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
+ *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.flip, a.jitter, a.hue, a.saturation, a.exposure, a.small_object);
} else if (a.type == SWAG_DATA){
*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
} else if (a.type == COMPARE_DATA){
@@ -696,8 +834,11 @@
} else if (a.type == IMAGE_DATA){
*(a.im) = load_image_color(a.path, 0, 0);
*(a.resized) = resize_image(*(a.im), a.w, a.h);
+ }else if (a.type == LETTERBOX_DATA) {
+ *(a.im) = load_image_color(a.path, 0, 0);
+ *(a.resized) = letterbox_image(*(a.im), a.w, a.h);
} else if (a.type == TAG_DATA){
- *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
+ *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.flip, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure);
}
free(ptr);
return 0;
@@ -714,6 +855,7 @@
void *load_threads(void *ptr)
{
+ //srand(time(0));
int i;
load_args args = *(load_args *)ptr;
if (args.threads == 0) args.threads = 1;
@@ -771,24 +913,24 @@
data d = {0};
d.shallow = 0;
d.X = load_image_paths(paths, n, w, h);
- d.y = load_labels_paths(paths, n, labels, k);
+ d.y = load_labels_paths(paths, n, labels, k, 0);
if(m) free(paths);
return d;
}
/*
-data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
-{
- data d = {0};
- d.indexes = calloc(n, sizeof(int));
- if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
- d.shallow = 0;
- d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
- d.y = load_labels_paths(paths, n, labels, k);
- if(m) free(paths);
- return d;
-}
-*/
+ data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
+ {
+ data d = {0};
+ d.indexes = calloc(n, sizeof(int));
+ if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
+ d.shallow = 0;
+ d.X = load_image_augment_paths(paths, n, flip, min, max, size, angle, aspect, hue, saturation, exposure);
+ d.y = load_labels_paths(paths, n, labels, k);
+ if(m) free(paths);
+ return d;
+ }
+ */
data load_data_super(char **paths, int n, int m, int w, int h, int scale)
{
@@ -808,7 +950,7 @@
for(i = 0; i < n; ++i){
image im = load_image_color(paths[i], 0, 0);
image crop = random_crop_image(im, w*scale, h*scale);
- int flip = rand()%2;
+ int flip = random_gen()%2;
if (flip) flip_image(crop);
image resize = resize_image(crop, w, h);
d.X.vals[i] = resize.data;
@@ -820,25 +962,25 @@
return d;
}
-data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
+data load_data_augment(char **paths, int n, int m, char **labels, int k, tree *hierarchy, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
{
if(m) paths = get_random_paths(paths, n, m);
data d = {0};
d.shallow = 0;
- d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
- d.y = load_labels_paths(paths, n, labels, k);
+ d.X = load_image_augment_paths(paths, n, use_flip, min, max, size, angle, aspect, hue, saturation, exposure);
+ d.y = load_labels_paths(paths, n, labels, k, hierarchy);
if(m) free(paths);
return d;
}
-data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
+data load_data_tag(char **paths, int n, int m, int k, int use_flip, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure)
{
if(m) paths = get_random_paths(paths, n, m);
data d = {0};
d.w = size;
d.h = size;
d.shallow = 0;
- d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure);
+ d.X = load_image_augment_paths(paths, n, use_flip, min, max, size, angle, aspect, hue, saturation, exposure);
d.y = load_tags_paths(paths, n, k);
if(m) free(paths);
return d;
@@ -913,8 +1055,8 @@
for(i = 0; i < 10000; ++i){
unsigned char bytes[3073];
fread(bytes, 1, 3073, fp);
- int class = bytes[0];
- y.vals[i][class] = 1;
+ int class_id = bytes[0];
+ y.vals[i][class_id] = 1;
for(j = 0; j < X.cols; ++j){
X.vals[i][j] = (double)bytes[j+1];
}
@@ -930,7 +1072,7 @@
{
int j;
for(j = 0; j < n; ++j){
- int index = rand()%d.X.rows;
+ int index = random_gen()%d.X.rows;
memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float));
memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float));
}
@@ -977,8 +1119,8 @@
for(i = 0; i < 10000; ++i){
unsigned char bytes[3073];
fread(bytes, 1, 3073, fp);
- int class = bytes[0];
- y.vals[i+b*10000][class] = 1;
+ int class_id = bytes[0];
+ y.vals[i+b*10000][class_id] = 1;
for(j = 0; j < X.cols; ++j){
X.vals[i+b*10000][j] = (double)bytes[j+1];
}
@@ -1043,7 +1185,7 @@
{
int i;
for(i = d.X.rows-1; i > 0; --i){
- int index = rand()%i;
+ int index = random_gen()%i;
float *swap = d.X.vals[index];
d.X.vals[index] = d.X.vals[i];
d.X.vals[i] = swap;
@@ -1107,7 +1249,7 @@
int i;
for(i = 0; i < num; ++i){
- int index = rand()%d.X.rows;
+ int index = random_gen()%d.X.rows;
r.X.vals[i] = d.X.vals[index];
r.y.vals[i] = d.y.vals[index];
}
--
Gitblit v1.10.0