From f996bd59a61338d8d51e2b19482d684f6dd04d0f Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Wed, 23 Sep 2015 21:16:48 +0000
Subject: [PATCH] more writing fixes
---
src/network.c | 4 +
src/yolo.c | 6 +
src/swag.c | 6 +
src/cost_layer.c | 13 +++
cfg/writing.cfg | 26 ++----
src/data.c | 6
src/dice.c | 6 +
src/data.h | 5
src/coco.c | 6 +
src/writing.c | 110 ++++++++++++++------------
src/imagenet.c | 6 +
src/captcha.c | 6 +
src/cost_layer.h | 1
13 files changed, 115 insertions(+), 86 deletions(-)
diff --git a/cfg/writing.cfg b/cfg/writing.cfg
index ebee3ed..1ed899b 100644
--- a/cfg/writing.cfg
+++ b/cfg/writing.cfg
@@ -4,39 +4,31 @@
height=256
width=256
channels=3
-learning_rate=0.000001
+learning_rate=0.00000001
momentum=0.9
decay=0.0005
seen=0
-[crop]
-crop_height=256
-crop_width=256
-flip=0
-angle=0
-saturation=1
-exposure=1
+[convolutional]
+filters=32
+size=3
+stride=1
+pad=1
+activation=leaky
[convolutional]
filters=32
size=3
stride=1
pad=1
-activation=ramp
+activation=leaky
[convolutional]
filters=32
size=3
stride=1
pad=1
-activation=ramp
-
-[convolutional]
-filters=32
-size=3
-stride=1
-pad=1
-activation=ramp
+activation=leaky
[convolutional]
filters=1
diff --git a/src/captcha.c b/src/captcha.c
index 4e77ce2..79b4e4e 100644
--- a/src/captcha.c
+++ b/src/captcha.c
@@ -106,7 +106,8 @@
srand(2222222);
int i = 0;
char **names = get_labels("/data/captcha/reimgs.labels.list");
- char input[256];
+ char buff[256];
+ char *input = buff;
int indexes[26];
while(1){
if(filename){
@@ -114,7 +115,8 @@
}else{
//printf("Enter Image Path: ");
//fflush(stdout);
- fgets(input, 256, stdin);
+ input = fgets(input, 256, stdin);
+ if(!input) return;
strtok(input, "\n");
}
image im = load_image_color(input, net.w, net.h);
diff --git a/src/coco.c b/src/coco.c
index 234f342..c016548 100644
--- a/src/coco.c
+++ b/src/coco.c
@@ -495,14 +495,16 @@
set_batch_network(&net, 1);
srand(2222222);
clock_t time;
- char input[256];
+ char buff[256];
+ char *input = buff;
while(1){
if(filename){
strncpy(input, filename, 256);
} else {
printf("Enter Image Path: ");
fflush(stdout);
- fgets(input, 256, stdin);
+ input = fgets(input, 256, stdin);
+ if(!input) return;
strtok(input, "\n");
}
image im = load_image_color(input,0,0);
diff --git a/src/cost_layer.c b/src/cost_layer.c
index 4ec0ac4..7593490 100644
--- a/src/cost_layer.c
+++ b/src/cost_layer.c
@@ -45,6 +45,17 @@
return l;
}
+void resize_cost_layer(cost_layer *l, int inputs)
+{
+ l->inputs = inputs;
+ l->outputs = inputs;
+ l->delta = realloc(l->delta, inputs*l->batch*sizeof(float));
+#ifdef GPU
+ cuda_free(l->delta_gpu);
+ l->delta_gpu = cuda_make_array(l->delta, inputs*l->batch);
+#endif
+}
+
void forward_cost_layer(cost_layer l, network_state state)
{
if (!state.truth) return;
@@ -83,7 +94,7 @@
if (l.cost_type == MASKED) {
mask_ongpu(l.batch*l.inputs, state.input, SECRET_NUM, state.truth);
}
-
+
copy_ongpu(l.batch*l.inputs, state.truth, 1, l.delta_gpu, 1);
axpy_ongpu(l.batch*l.inputs, -1, state.input, 1, l.delta_gpu, 1);
diff --git a/src/cost_layer.h b/src/cost_layer.h
index 9ad3124..aa4af2f 100644
--- a/src/cost_layer.h
+++ b/src/cost_layer.h
@@ -10,6 +10,7 @@
cost_layer make_cost_layer(int batch, int inputs, COST_TYPE type, float scale);
void forward_cost_layer(const cost_layer l, network_state state);
void backward_cost_layer(const cost_layer l, network_state state);
+void resize_cost_layer(cost_layer *l, int inputs);
#ifdef GPU
void forward_cost_layer_gpu(cost_layer l, network_state state);
diff --git a/src/data.c b/src/data.c
index 7574a44..2853d72 100644
--- a/src/data.c
+++ b/src/data.c
@@ -554,7 +554,7 @@
} else if (a.type == DETECTION_DATA){
*a.d = load_data_detection(a.n, a.paths, a.m, a.classes, a.w, a.h, a.num_boxes, a.background);
} else if (a.type == WRITING_DATA){
- *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.downsample);
+ *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
} else if (a.type == REGION_DATA){
*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes);
} else if (a.type == COMPARE_DATA){
@@ -578,14 +578,14 @@
return thread;
}
-data load_data_writing(char **paths, int n, int m, int w, int h, int downsample)
+data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h)
{
if(m) paths = get_random_paths(paths, n, m);
char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png");
data d;
d.shallow = 0;
d.X = load_image_paths(paths, n, w, h);
- d.y = load_image_paths_gray(replace_paths, n, w/downsample, h/downsample);
+ d.y = load_image_paths_gray(replace_paths, n, out_w, out_h);
if(m) free(paths);
int i;
for(i = 0; i < n; ++i) free(replace_paths[i]);
diff --git a/src/data.h b/src/data.h
index c7809b5..b91819f 100644
--- a/src/data.h
+++ b/src/data.h
@@ -37,7 +37,8 @@
char **labels;
int h;
int w;
- int downsample;
+ int out_w;
+ int out_h;
int nh;
int nw;
int num_boxes;
@@ -69,7 +70,7 @@
data load_cifar10_data(char *filename);
data load_all_cifar10();
-data load_data_writing(char **paths, int n, int m, int w, int h, int downsample);
+data load_data_writing(char **paths, int n, int m, int w, int h, int out_w, int out_h);
list *get_paths(char *filename);
char **get_labels(char *filename);
diff --git a/src/dice.c b/src/dice.c
index 7948741..fdc535e 100644
--- a/src/dice.c
+++ b/src/dice.c
@@ -76,7 +76,8 @@
srand(2222222);
int i = 0;
char **names = dice_labels;
- char input[256];
+ char buff[256];
+ char *input = buff;
int indexes[6];
while(1){
if(filename){
@@ -84,7 +85,8 @@
}else{
printf("Enter Image Path: ");
fflush(stdout);
- fgets(input, 256, stdin);
+ input = fgets(input, 256, stdin);
+ if(!input) return;
strtok(input, "\n");
}
image im = load_image_color(input, net.w, net.h);
diff --git a/src/imagenet.c b/src/imagenet.c
index c826a0f..567a8c4 100644
--- a/src/imagenet.c
+++ b/src/imagenet.c
@@ -152,15 +152,17 @@
int i = 0;
char **names = get_labels("data/shortnames.txt");
clock_t time;
- char input[256];
int indexes[10];
+ char buff[256];
+ char *input = buff;
while(1){
if(filename){
strncpy(input, filename, 256);
}else{
printf("Enter Image Path: ");
fflush(stdout);
- fgets(input, 256, stdin);
+ input = fgets(input, 256, stdin);
+ if(!input) return;
strtok(input, "\n");
}
image im = load_image_color(input, 256, 256);
diff --git a/src/network.c b/src/network.c
index 80ee291..7f19318 100644
--- a/src/network.c
+++ b/src/network.c
@@ -330,6 +330,7 @@
//if(w == net->w && h == net->h) return 0;
net->w = w;
net->h = h;
+ int inputs = 0;
//fprintf(stderr, "Resizing to %d x %d...", w, h);
//fflush(stderr);
for (i = 0; i < net->n; ++i){
@@ -343,9 +344,12 @@
break;
}else if(l.type == NORMALIZATION){
resize_normalization_layer(&l, w, h);
+ }else if(l.type == COST){
+ resize_cost_layer(&l, inputs);
}else{
error("Cannot resize this type of layer");
}
+ inputs = l.outputs;
net->layers[i] = l;
w = l.out_w;
h = l.out_h;
diff --git a/src/swag.c b/src/swag.c
index 37dde36..7058df5 100644
--- a/src/swag.c
+++ b/src/swag.c
@@ -274,14 +274,16 @@
set_batch_network(&net, 1);
srand(2222222);
clock_t time;
- char input[256];
+ char buff[256];
+ char *input = buff;
while(1){
if(filename){
strncpy(input, filename, 256);
} else {
printf("Enter Image Path: ");
fflush(stdout);
- fgets(input, 256, stdin);
+ input = fgets(input, 256, stdin);
+ if(!input) return;
strtok(input, "\n");
}
image im = load_image_color(input,0,0);
diff --git a/src/writing.c b/src/writing.c
index 71dd53b..32e4f6d 100644
--- a/src/writing.c
+++ b/src/writing.c
@@ -25,16 +25,18 @@
clock_t time;
int N = plist->size;
printf("N: %d\n", N);
+ image out = get_network_image(net);
data train, buffer;
load_args args = {0};
args.w = net.w;
args.h = net.h;
+ args.out_w = out.w;
+ args.out_h = out.h;
args.paths = paths;
args.n = imgs;
args.m = N;
- args.downsample = 1;
args.d = &buffer;
args.type = WRITING_DATA;
@@ -51,9 +53,9 @@
float loss = train_network(net, train);
/*
- image pred = float_to_image(64, 64, 1, out);
- print_image(pred);
- */
+ image pred = float_to_image(64, 64, 1, out);
+ print_image(pred);
+ */
/*
image im = float_to_image(256, 256, 3, train.X.vals[0]);
@@ -69,22 +71,22 @@
if(avg_loss == -1) avg_loss = loss;
avg_loss = avg_loss*.9 + loss*.1;
printf("%d, %.3f: %f, %f avg, %f rate, %lf seconds, %d images\n", get_current_batch(net), (float)(*net.seen)/N, loss, avg_loss, get_current_rate(net), sec(clock()-time), *net.seen);
- free_data(train);
- if(get_current_batch(net)%100 == 0){
- char buff[256];
- sprintf(buff, "%s/%s_batch_%d.weights", backup_directory, base, get_current_batch(net));
- save_weights(net, buff);
- }
- if(*net.seen/N > epoch){
- epoch = *net.seen/N;
- char buff[256];
- sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
- save_weights(net, buff);
- }
+ free_data(train);
+ if(get_current_batch(net)%100 == 0){
+ char buff[256];
+ sprintf(buff, "%s/%s_batch_%d.weights", backup_directory, base, get_current_batch(net));
+ save_weights(net, buff);
+ }
+ if(*net.seen/N > epoch){
+ epoch = *net.seen/N;
+ char buff[256];
+ sprintf(buff, "%s/%s_%d.weights",backup_directory,base, epoch);
+ save_weights(net, buff);
+ }
}
}
-void test_writing(char *cfgfile, char *weightfile, char *outfile)
+void test_writing(char *cfgfile, char *weightfile, char *filename)
{
network net = parse_network_cfg(cfgfile);
if(weightfile){
@@ -93,51 +95,57 @@
set_batch_network(&net, 1);
srand(2222222);
clock_t time;
- char filename[256];
+ char buff[256];
+ char *input = buff;
+ while(1){
+ if(filename){
+ strncpy(input, filename, 256);
+ }else{
+ printf("Enter Image Path: ");
+ fflush(stdout);
+ input = fgets(input, 256, stdin);
+ if(!input) return;
+ strtok(input, "\n");
+ }
- fgets(filename, 256, stdin);
- strtok(filename, "\n");
- image im = load_image_color(filename, 0, 0);
- //image im = load_image_color("/home/pjreddie/darknet/data/figs/C02-1001-Figure-1.png", 0, 0);
- image sized = resize_image(im, net.w, net.h);
- printf("%d %d %d\n", im.h, im.w, im.c);
- float *X = sized.data;
- time=clock();
- network_predict(net, X);
- printf("%s: Predicted in %f seconds.\n", filename, sec(clock()-time));
- image pred = get_network_image(net);
+ image im = load_image_color(input, 0, 0);
+ resize_network(&net, im.w, im.h);
+ printf("%d %d %d\n", im.h, im.w, im.c);
+ float *X = im.data;
+ time=clock();
+ network_predict(net, X);
+ printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
+ image pred = get_network_image(net);
- image t = threshold_image(pred, .5);
- free_image(pred);
- pred = t;
+ image upsampled = resize_image(pred, im.w, im.h);
+ image thresh = threshold_image(upsampled, .5);
+ pred = thresh;
- if (outfile) {
- printf("Save image as %s.png (shape: %d %d)\n", outfile, pred.w, pred.h);
- save_image(pred, outfile);
- } else {
- show_image(sized, "orig");
show_image(pred, "prediction");
+ show_image(im, "orig");
#ifdef OPENCV
- cvWaitKey(0);
- cvDestroyAllWindows();
+ cvWaitKey(0);
+ cvDestroyAllWindows();
#endif
- }
- free_image(im);
- free_image(sized);
+ free_image(upsampled);
+ free_image(thresh);
+ free_image(im);
+ if (filename) break;
+ }
}
void run_writing(int argc, char **argv)
{
- if(argc < 4){
- fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
- return;
- }
+ if(argc < 4){
+ fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
+ return;
+ }
- char *cfg = argv[3];
- char *weights = (argc > 4) ? argv[4] : 0;
- char *outfile = (argc > 5) ? argv[5] : 0;
- if(0==strcmp(argv[2], "train")) train_writing(cfg, weights);
- else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, outfile);
+ char *cfg = argv[3];
+ char *weights = (argc > 4) ? argv[4] : 0;
+ char *filename = (argc > 5) ? argv[5] : 0;
+ if(0==strcmp(argv[2], "train")) train_writing(cfg, weights);
+ else if(0==strcmp(argv[2], "test")) test_writing(cfg, weights, filename);
}
diff --git a/src/yolo.c b/src/yolo.c
index 9b229e2..b2c89d8 100644
--- a/src/yolo.c
+++ b/src/yolo.c
@@ -290,14 +290,16 @@
set_batch_network(&net, 1);
srand(2222222);
clock_t time;
- char input[256];
+ char buff[256];
+ char *input = buff;
while(1){
if(filename){
strncpy(input, filename, 256);
} else {
printf("Enter Image Path: ");
fflush(stdout);
- fgets(input, 256, stdin);
+ input = fgets(input, 256, stdin);
+ if(!input) return;
strtok(input, "\n");
}
image im = load_image_color(input,0,0);
--
Gitblit v1.10.0