From aebe937710ced03d03f73ab23f410f29685655c1 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Thu, 11 Aug 2016 18:54:24 +0000
Subject: [PATCH] what do you even write here?
---
src/image.c | 69 +++++++++++------
cfg/yolo.cfg | 53 +++++++++++-
src/utils.h | 1
src/network.h | 3
src/connected_layer.c | 2
src/parser.c | 4 +
src/data.c | 30 ++++--
src/classifier.c | 21 +++--
src/go.c | 2
src/data.h | 9 +
src/image.h | 2
src/utils.c | 6 +
12 files changed, 144 insertions(+), 58 deletions(-)
diff --git a/cfg/yolo.cfg b/cfg/yolo.cfg
index 1f69856..6876eff 100644
--- a/cfg/yolo.cfg
+++ b/cfg/yolo.cfg
@@ -1,19 +1,29 @@
[net]
-batch=1
-subdivisions=1
+batch=64
+subdivisions=2
height=448
width=448
channels=3
momentum=0.9
decay=0.0005
-learning_rate=0.001
+learning_rate=0.0005
policy=steps
steps=200,400,600,20000,30000
scales=2.5,2,2,.1,.1
max_batches = 40000
+[crop]
+crop_width=448
+crop_height=448
+flip=0
+angle=0
+saturation = 1.5
+exposure = 1.5
+noadjust=1
+
[convolutional]
+batch_normalize=1
filters=64
size=7
stride=2
@@ -25,6 +35,7 @@
stride=2
[convolutional]
+batch_normalize=1
filters=192
size=3
stride=1
@@ -36,6 +47,7 @@
stride=2
[convolutional]
+batch_normalize=1
filters=128
size=1
stride=1
@@ -43,6 +55,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=256
size=3
stride=1
@@ -50,6 +63,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=256
size=1
stride=1
@@ -57,6 +71,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=512
size=3
stride=1
@@ -68,6 +83,7 @@
stride=2
[convolutional]
+batch_normalize=1
filters=256
size=1
stride=1
@@ -75,6 +91,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=512
size=3
stride=1
@@ -82,6 +99,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=256
size=1
stride=1
@@ -89,6 +107,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=512
size=3
stride=1
@@ -96,6 +115,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=256
size=1
stride=1
@@ -103,6 +123,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=512
size=3
stride=1
@@ -110,6 +131,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=256
size=1
stride=1
@@ -117,6 +139,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=512
size=3
stride=1
@@ -124,6 +147,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=512
size=1
stride=1
@@ -131,6 +155,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=1024
size=3
stride=1
@@ -142,6 +167,7 @@
stride=2
[convolutional]
+batch_normalize=1
filters=512
size=1
stride=1
@@ -149,6 +175,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=1024
size=3
stride=1
@@ -156,6 +183,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=512
size=1
stride=1
@@ -163,6 +191,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
filters=1024
size=3
stride=1
@@ -172,6 +201,7 @@
#######
[convolutional]
+batch_normalize=1
size=3
stride=1
pad=1
@@ -179,6 +209,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
size=3
stride=2
pad=1
@@ -186,6 +217,7 @@
activation=leaky
[convolutional]
+batch_normalize=1
size=3
stride=1
pad=1
@@ -193,18 +225,25 @@
activation=leaky
[convolutional]
+batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky
-[connected]
-output=4096
+[local]
+size=3
+stride=1
+pad=1
+filters=256
activation=leaky
+[dropout]
+probability=.5
+
[connected]
-output= 1470
+output= 1715
activation=linear
[detection]
@@ -212,7 +251,7 @@
coords=4
rescore=1
side=7
-num=2
+num=3
softmax=0
sqrt=1
jitter=.2
diff --git a/src/classifier.c b/src/classifier.c
index 608e3ab..ee6d212 100644
--- a/src/classifier.c
+++ b/src/classifier.c
@@ -41,7 +41,7 @@
void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
{
- int nthreads = 2;
+ int nthreads = 8;
int i;
data_seed = time(0);
@@ -82,6 +82,9 @@
args.min = net.min_crop;
args.max = net.max_crop;
+ args.angle = net.angle;
+ args.exposure = net.exposure;
+ args.saturation = net.saturation;
args.size = net.w;
args.paths = paths;
@@ -113,14 +116,14 @@
printf("Loaded: %lf seconds\n", sec(clock()-time));
time=clock();
- /*
- int u;
- for(u = 0; u < net.batch; ++u){
- image im = float_to_image(net.w, net.h, 3, train.X.vals[u]);
- show_image(im, "loaded");
- cvWaitKey(0);
- }
- */
+ if(0){
+ int u;
+ for(u = 0; u < imgs; ++u){
+ image im = float_to_image(net.w, net.h, 3, train.X.vals[u]);
+ show_image(im, "loaded");
+ cvWaitKey(0);
+ }
+ }
float loss = train_network(net, train);
if(avg_loss == -1) avg_loss = loss;
diff --git a/src/connected_layer.c b/src/connected_layer.c
index e7784ea..623e6c8 100644
--- a/src/connected_layer.c
+++ b/src/connected_layer.c
@@ -265,7 +265,7 @@
void backward_connected_layer_gpu(connected_layer l, network_state state)
{
int i;
- constrain_ongpu(l.outputs*l.batch, 5, l.delta_gpu, 1);
+ constrain_ongpu(l.outputs*l.batch, 1, l.delta_gpu, 1);
gradient_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation, l.delta_gpu);
for(i = 0; i < l.batch; ++i){
axpy_ongpu(l.outputs, 1, l.delta_gpu + i*l.outputs, 1, l.bias_updates_gpu, 1);
diff --git a/src/data.c b/src/data.c
index 231fb93..9aa08af 100644
--- a/src/data.c
+++ b/src/data.c
@@ -100,7 +100,7 @@
return X;
}
-matrix load_image_cropped_paths(char **paths, int n, int min, int max, int size)
+matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float exposure, float saturation)
{
int i;
matrix X;
@@ -110,9 +110,14 @@
for(i = 0; i < n; ++i){
image im = load_image_color(paths[i], 0, 0);
- image crop = random_resize_crop_image(im, min, max, size);
+ image crop = random_augment_image(im, angle, min, max, size);
int flip = rand_r(&data_seed)%2;
if (flip) flip_image(crop);
+ float exp = rand_uniform(1./exposure, exposure);
+ float sat = rand_uniform(1./saturation, saturation);
+ exposure_image(crop, exp);
+ exposure_image(crop, sat);
+
/*
show_image(im, "orig");
show_image(crop, "crop");
@@ -668,14 +673,17 @@
//printf("Loading data: %d\n", rand_r(&data_seed));
load_args a = *(struct load_args*)ptr;
+ if(a.exposure == 0) a.exposure = 1;
+ if(a.saturation == 0) a.saturation = 1;
+
if (a.type == OLD_CLASSIFICATION_DATA){
*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
} else if (a.type == CLASSIFICATION_DATA){
- *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size);
+ *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
} else if (a.type == SUPER_DATA){
*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
} else if (a.type == STUDY_DATA){
- *a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size);
+ *a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
} else if (a.type == WRITING_DATA){
*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
} else if (a.type == REGION_DATA){
@@ -690,7 +698,7 @@
*(a.im) = load_image_color(a.path, 0, 0);
*(a.resized) = resize_image(*(a.im), a.w, a.h);
} else if (a.type == TAG_DATA){
- *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size);
+ *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
//*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
}
free(ptr);
@@ -732,13 +740,13 @@
return d;
}
-data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size)
+data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
{
data d = {0};
d.indexes = calloc(n, sizeof(int));
if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
d.shallow = 0;
- d.X = load_image_cropped_paths(paths, n, min, max, size);
+ d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
d.y = load_labels_paths(paths, n, labels, k);
if(m) free(paths);
return d;
@@ -774,25 +782,25 @@
return d;
}
-data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size)
+data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
{
if(m) paths = get_random_paths(paths, n, m);
data d = {0};
d.shallow = 0;
- d.X = load_image_cropped_paths(paths, n, min, max, size);
+ d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
d.y = load_labels_paths(paths, n, labels, k);
if(m) free(paths);
return d;
}
-data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size)
+data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation)
{
if(m) paths = get_random_paths(paths, n, m);
data d = {0};
d.w = size;
d.h = size;
d.shallow = 0;
- d.X = load_image_cropped_paths(paths, n, min, max, size);
+ d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
d.y = load_tags_paths(paths, n, k);
if(m) free(paths);
return d;
diff --git a/src/data.h b/src/data.h
index 75123a5..1220be0 100644
--- a/src/data.h
+++ b/src/data.h
@@ -51,6 +51,9 @@
int background;
int scale;
float jitter;
+ float angle;
+ float saturation;
+ float exposure;
data *d;
image *im;
image *resized;
@@ -72,10 +75,10 @@
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
data load_data(char **paths, int n, int m, char **labels, int k, int w, int h);
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter);
-data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size);
-data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size);
+data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation);
+data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
-data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size);
+data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
data load_go(char *filename);
box_label *read_boxes(char *filename, int *n);
diff --git a/src/go.c b/src/go.c
index 91beaf1..bb5e60e 100644
--- a/src/go.c
+++ b/src/go.c
@@ -132,7 +132,7 @@
char buff[256];
float *board = calloc(19*19*net.batch, sizeof(float));
float *move = calloc(19*19*net.batch, sizeof(float));
- moves m = load_go_moves("/home/pjreddie/go.train");
+ moves m = load_go_moves("/home/pjreddie/backup/go.train");
//moves m = load_go_moves("games.txt");
int N = m.n;
diff --git a/src/image.c b/src/image.c
index fd890d0..fe63b34 100644
--- a/src/image.c
+++ b/src/image.c
@@ -459,6 +459,25 @@
return out;
}
+ image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
+ {
+ int x, y, c;
+ float cx = im.w/2.;
+ float cy = im.h/2.;
+ image rot = make_image(w, h, im.c);
+ for(c = 0; c < im.c; ++c){
+ for(y = 0; y < h; ++y){
+ for(x = 0; x < w; ++x){
+ float rx = cos(rad)*(x/s + dx/s -cx) - sin(rad)*(y/s + dy/s -cy) + cx;
+ float ry = sin(rad)*(x/s + dx/s -cx) + cos(rad)*(y/s + dy/s -cy) + cy;
+ float val = bilinear_interpolate(im, rx, ry, c);
+ set_pixel(rot, x, y, c, val);
+ }
+ }
+ }
+ return rot;
+ }
+
image rotate_image(image im, float rad)
{
int x, y, c;
@@ -603,15 +622,19 @@
return crop;
}
-image random_resize_crop_image(image im, int low, int high, int size)
+image random_augment_image(image im, float angle, int low, int high, int size)
{
int r = rand_int(low, high);
- image resized = resize_min(im, r);
- int dx = rand_int(0, resized.w - size);
- int dy = rand_int(0, resized.h - size);
- image crop = crop_image(resized, dx, dy, size, size);
+ int min = (im.h < im.w) ? im.h : im.w;
+ float scale = (float)r / min;
- if(resized.data != im.data) free_image(resized);
+ float rad = rand_uniform(-angle, angle) * TWO_PI / 360.;
+ int dx = rand_int(0, scale * im.w - size);
+ int dy = rand_int(0, scale * im.h - size);
+ //printf("%d %d\n", dx, dy);
+
+ image crop = rotate_crop_image(im, rad, scale, size, size, dx, dy);
+
return crop;
}
@@ -794,23 +817,6 @@
constrain_image(im);
}
-/*
- image saturate_image(image im, float sat)
- {
- image gray = grayscale_image(im);
- image blend = blend_image(im, gray, sat);
- free_image(gray);
- constrain_image(blend);
- return blend;
- }
-
- image brightness_image(image im, float b)
- {
- image bright = make_image(im.w, im.h, im.c);
- return bright;
- }
- */
-
float bilinear_interpolate(image im, float x, float y, int c)
{
int ix = (int) floorf(x);
@@ -893,6 +899,7 @@
image bin = binarize_image(im);
+/*
#ifdef GPU
image r = resize_image(im, im.w, im.h);
image black = make_image(im.w*2 + 3, im.h*2 + 3, 9);
@@ -911,7 +918,16 @@
show_image_layers(black, "Black");
show_image(black2, "Recreate");
#endif
+*/
+ image rot = rotate_crop_image(im, -.2618, 1, im.w/2, im.h/2, 0, 0);
+ image rot3 = rotate_crop_image(im, -.2618, 2, im.w, im.h, im.w/2, 0);
+ image rot2 = rotate_crop_image(im, -.2618, 1, im.w, im.h, 0, 0);
+ show_image(rot, "Rotated");
+ show_image(rot2, "base");
+ show_image(rot3, "Rotated2");
+
+/*
show_image(im, "Original");
show_image(bin, "Binary");
show_image(gray, "Gray");
@@ -919,6 +935,7 @@
show_image(sat5, "Saturation-.5");
show_image(exp2, "Exposure-2");
show_image(exp5, "Exposure-.5");
+ */
#ifdef OPENCV
cvWaitKey(0);
#endif
@@ -1036,7 +1053,11 @@
}
float get_pixel_extend(image m, int x, int y, int c)
{
- if(x < 0 || x >= m.w || y < 0 || y >= m.h || c < 0 || c >= m.c) return 0;
+ if(x < 0) x = 0;
+ if(x >= m.w) x = m.w-1;
+ if(y < 0) y = 0;
+ if(y >= m.h) y = m.h-1;
+ if(c < 0 || c >= m.c) return 0;
return get_pixel(m, x, y, c);
}
void set_pixel(image m, int x, int y, int c, float val)
diff --git a/src/image.h b/src/image.h
index e4eecd5..9c53fd3 100644
--- a/src/image.h
+++ b/src/image.h
@@ -31,7 +31,7 @@
void scale_image(image m, float s);
image crop_image(image im, int dx, int dy, int w, int h);
image random_crop_image(image im, int w, int h);
-image random_resize_crop_image(image im, int low, int high, int size);
+image random_augment_image(image im, float angle, int low, int high, int size);
image resize_image(image im, int w, int h);
image resize_min(image im, int min);
void translate_image(image m, float s);
diff --git a/src/network.h b/src/network.h
index 4157387..3306962 100644
--- a/src/network.h
+++ b/src/network.h
@@ -40,6 +40,9 @@
int h, w, c;
int max_crop;
int min_crop;
+ float angle;
+ float exposure;
+ float saturation;
int gpu_index;
diff --git a/src/parser.c b/src/parser.c
index 503e7cf..904df1a 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -483,6 +483,10 @@
net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
+ net->angle = option_find_float_quiet(options, "angle", 0);
+ net->saturation = option_find_float_quiet(options, "saturation", 1);
+ net->exposure = option_find_float_quiet(options, "exposure", 1);
+
if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
char *policy_s = option_find_str(options, "policy", "constant");
diff --git a/src/utils.c b/src/utils.c
index 7386305..41893fc 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -531,7 +531,6 @@
}
// From http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
-#define TWO_PI 6.2831853071795864769252866
float rand_normal()
{
static int haveSpare = 0;
@@ -578,6 +577,11 @@
float rand_uniform(float min, float max)
{
+ if(max < min){
+ float swap = min;
+ min = max;
+ max = swap;
+ }
return ((float)rand()/RAND_MAX * (max - min)) + min;
}
diff --git a/src/utils.h b/src/utils.h
index cba7f6f..ae0cba2 100644
--- a/src/utils.h
+++ b/src/utils.h
@@ -5,6 +5,7 @@
#include "list.h"
#define SECRET_NUM -1234
+#define TWO_PI 6.2831853071795864769252866
void shuffle(void *arr, size_t n, size_t size);
void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections);
--
Gitblit v1.10.0