~speedprog/mtg/mtg_card_detector.git

parent: b5936b49 | patch | commit | show whitespace

Joseph Redmon

2015-09-09 393dc8eb6f3a9dd92ec665200444186c1addc5d2

stable

9 files modified

1 files added

	Makefile	2 ●●●●● patch \| view \| raw \| blame \| history
	cfg/darknet.cfg	14 ●●●●● patch \| view \| raw \| blame \| history
	cfg/yolo.cfg	7 ●●●●● patch \| view \| raw \| blame \| history
	src/darknet.c	3 ●●●●● patch \| view \| raw \| blame \| history
	src/detection_layer.c	9 ●●●●● patch \| view \| raw \| blame \| history
	src/network.c	13 ●●●●● patch \| view \| raw \| blame \| history
	src/network.h	6 ●●●●● patch \| view \| raw \| blame \| history
	src/parser.c	33 ●●●●● patch \| view \| raw \| blame \| history
	src/yolo.c	18 ●●●●● patch \| view \| raw \| blame \| history
	src/yoloplus.c	334 ●●●●● patch \| view \| raw \| blame \| history

 Makefile

@@ -34,7 +34,7 @@
LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand
endif

OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o region_layer.o layer.o compare.o
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o region_layer.o layer.o compare.o yoloplus.o
ifeq ($(GPU), 1) 
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
endif

 cfg/darknet.cfg

@@ -27,7 +27,7 @@
activation=leaky

[maxpool]
size=3
size=2
stride=2

[convolutional]
@@ -38,7 +38,7 @@
activation=leaky

[maxpool]
size=3
size=2
stride=2

[convolutional]
@@ -49,7 +49,7 @@
activation=leaky

[maxpool]
size=3
size=2
stride=2

[convolutional]
@@ -60,7 +60,7 @@
activation=leaky

[maxpool]
size=3
size=2
stride=2

[convolutional]
@@ -71,7 +71,7 @@
activation=leaky

[maxpool]
size=3
size=2
stride=2

[convolutional]
@@ -82,7 +82,7 @@
activation=leaky

[maxpool]
size=3
size=2
stride=2

[convolutional]
@@ -99,7 +99,7 @@

[connected]
output=1000
activation=linear
activation=leaky

[softmax]


 cfg/yolo.cfg

@@ -4,10 +4,15 @@
height=448
width=448
channels=3
learning_rate=0.01
learning_rate=0.001
momentum=0.9
decay=0.0005

policy=steps
steps=50, 5000
scales=10, .1
max_batches = 8000

[crop]
crop_width=448
crop_height=448

 src/darknet.c

@@ -13,6 +13,7 @@

extern void run_imagenet(int argc, char **argv);
extern void run_yolo(int argc, char **argv);
extern void run_yoloplus(int argc, char **argv);
extern void run_coco(int argc, char **argv);
extern void run_writing(int argc, char **argv);
extern void run_captcha(int argc, char **argv);
@@ -178,6 +179,8 @@
        average(argc, argv);
    } else if (0 == strcmp(argv[1], "yolo")){
        run_yolo(argc, argv);
    } else if (0 == strcmp(argv[1], "yoloplus")){
        run_yoloplus(argc, argv);
    } else if (0 == strcmp(argv[1], "coco")){
        run_coco(argc, argv);
    } else if (0 == strcmp(argv[1], "compare")){

 src/detection_layer.c

@@ -85,11 +85,12 @@
        int size = get_detection_layer_output_size(l) * l.batch;
        memset(l.delta, 0, size * sizeof(float));
        for (i = 0; i < l.batch*locations; ++i) {
            int classes = l.objectness+l.classes;
            int classes = (l.objectness || l.background)+l.classes;
            int offset = i*(classes+l.coords);
            for (j = offset; j < offset+classes; ++j) {
                *(l.cost) += pow(state.truth[j] - l.output[j], 2);
                l.delta[j] =  state.truth[j] - l.output[j];
                if(l.background && j == offset) l.delta[j] *= .1;
            }

            box truth;
@@ -115,12 +116,18 @@
            l.delta[j+2] = 4 * (state.truth[j+2] - l.output[j+2]);
            l.delta[j+3] = 4 * (state.truth[j+3] - l.output[j+3]);
            if(l.rescore){
                if(l.objectness){
                    state.truth[offset] = iou;
                    l.delta[offset] = state.truth[offset] - l.output[offset];
                }
                else{
                for (j = offset; j < offset+classes; ++j) {
                    if(state.truth[j]) state.truth[j] = iou;
                    l.delta[j] =  state.truth[j] - l.output[j];
                }
            }
        }
        }
        printf("Avg IOU: %f\n", avg_iou/count);
    }
}

 src/network.c

@@ -29,15 +29,26 @@
float get_current_rate(network net)
{
    int batch_num = get_current_batch(net);
    int i;
    float rate;
    switch (net.policy) {
        case CONSTANT:
            return net.learning_rate;
        case STEP:
            return net.learning_rate * pow(net.gamma, batch_num/net.step);
            return net.learning_rate * pow(net.scale, batch_num/net.step);
        case STEPS:
            rate = net.learning_rate;
            for(i = 0; i < net.num_steps; ++i){
                if(net.steps[i] > batch_num) return rate;
                rate *= net.scales[i];
            }
            return rate;
        case EXP:
            return net.learning_rate * pow(net.gamma, batch_num);
        case POLY:
            return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power);
        case SIG:
            return net.learning_rate * (1/(1+exp(net.gamma*(batch_num - net.step))));
        default:
            fprintf(stderr, "Policy is weird!\n");
            return net.learning_rate;

 src/network.h

@@ -8,7 +8,7 @@
#include "data.h"

typedef enum {
    CONSTANT, STEP, EXP, POLY
    CONSTANT, STEP, EXP, POLY, STEPS, SIG
} learning_rate_policy;

typedef struct {
@@ -25,9 +25,13 @@

    float learning_rate;
    float gamma;
    float scale;
    float power;
    int step;
    int max_batches;
    float *scales;
    int   *steps;
    int num_steps;

    int inputs;
    int h, w, c;

 src/parser.c

@@ -169,7 +169,7 @@
    int rescore = option_find_int(options, "rescore", 0);
    int joint = option_find_int(options, "joint", 0);
    int objectness = option_find_int(options, "objectness", 0);
    int background = 0;
    int background = option_find_int(options, "background", 0);
    detection_layer layer = make_detection_layer(params.batch, params.inputs, classes, coords, joint, rescore, background, objectness);
    return layer;
}
@@ -312,6 +312,8 @@
    if (strcmp(s, "constant")==0) return CONSTANT;
    if (strcmp(s, "step")==0) return STEP;
    if (strcmp(s, "exp")==0) return EXP;
    if (strcmp(s, "sigmoid")==0) return SIG;
    if (strcmp(s, "steps")==0) return STEPS;
    fprintf(stderr, "Couldn't find policy %s, going with constant\n", s);
    return CONSTANT;
}
@@ -337,9 +339,36 @@
    net->policy = get_policy(policy_s);
    if(net->policy == STEP){
        net->step = option_find_int(options, "step", 1);
        net->gamma = option_find_float(options, "gamma", 1);
        net->scale = option_find_float(options, "scale", 1);
    } else if (net->policy == STEPS){
        char *l = option_find(options, "steps");   
        char *p = option_find(options, "scales");   
        if(!l || !p) error("STEPS policy must have steps and scales in cfg file");

        int len = strlen(l);
        int n = 1;
        int i;
        for(i = 0; i < len; ++i){
            if (l[i] == ',') ++n;
        }
        int *steps = calloc(n, sizeof(int));
        float *scales = calloc(n, sizeof(float));
        for(i = 0; i < n; ++i){
            int step    = atoi(l);
            float scale = atof(p);
            l = strchr(l, ',')+1;
            p = strchr(p, ',')+1;
            steps[i] = step;
            scales[i] = scale;
        }
        net->scales = scales;
        net->steps = steps;
        net->num_steps = n;
    } else if (net->policy == EXP){
        net->gamma = option_find_float(options, "gamma", 1);
    } else if (net->policy == SIG){
        net->gamma = option_find_float(options, "gamma", 1);
        net->step = option_find_int(options, "step", 1);
    } else if (net->policy == POLY){
        net->power = option_find_float(options, "power", 1);
    }

 src/yolo.c

@@ -66,7 +66,6 @@
        load_weights(&net, weightfile);
    }
    detection_layer layer = get_network_detection_layer(net);
    printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
    int imgs = 128;
    int i = *net.seen/imgs;

@@ -75,10 +74,6 @@
    int N = plist->size;
    paths = (char **)list_to_array(plist);

    if(i*imgs > N*80){
        net.layers[net.n-1].joint = 1;
        net.layers[net.n-1].objectness = 0;
    }
    if(i*imgs > N*120){
        net.layers[net.n-1].rescore = 1;
    }
@@ -102,7 +97,7 @@

    pthread_t load_thread = load_data_in_thread(args);
    clock_t time;
    while(i*imgs < N*130){
    while(get_current_batch(net) < net.max_batches){
        i += 1;
        time=clock();
        pthread_join(load_thread, 0);
@@ -115,19 +110,10 @@
        if (avg_loss < 0) avg_loss = loss;
        avg_loss = avg_loss*.9 + loss*.1;

        printf("%d: %f, %f avg, %lf seconds, %d images, epoch: %f\n", i, loss, avg_loss, sec(clock()-time), i*imgs, ((float)i)*imgs/N);

        if((i-1)*imgs <= N && i*imgs > N){
            fprintf(stderr, "First stage done\n");
            net.learning_rate *= 10;
            char buff[256];
            sprintf(buff, "%s/%s_first_stage.weights", backup_directory, base);
            save_weights(net, buff);
        }
        printf("%d: %f, %f avg, %lf seconds, %f rate, %d images, epoch: %f\n", get_current_batch(net), loss, avg_loss, sec(clock()-time), get_current_rate(net), *net.seen, (float)*net.seen/N);

        if((i-1)*imgs <= 80*N && i*imgs > N*80){
            fprintf(stderr, "Second stage done.\n");
            net.learning_rate *= .1;
            char buff[256];
            sprintf(buff, "%s/%s_second_stage.weights", backup_directory, base);
            save_weights(net, buff);

 src/yoloplus.c

New file
@@ -0,0 +1,334 @@
#include "network.h"
#include "detection_layer.h"
#include "cost_layer.h"
#include "utils.h"
#include "parser.h"
#include "box.h"

#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
#endif

char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};

void draw_yoloplus(image im, float *box, int side, int objectness, char *label, float thresh)
{
    int classes = 20;
    int elems = 4+classes+objectness;
    int j;
    int r, c;

    for(r = 0; r < side; ++r){
        for(c = 0; c < side; ++c){
            j = (r*side + c) * elems;
            float scale = 1;
            if(objectness) scale = 1 - box[j++];
            int class = max_index(box+j, classes);
            if(scale * box[j+class] > thresh){
                int width = sqrt(scale*box[j+class])*5 + 1;
                printf("%f %s\n", scale * box[j+class], voc_names[class]);
                float red = get_color(0,class,classes);
                float green = get_color(1,class,classes);
                float blue = get_color(2,class,classes);

                j += classes;
                float x = box[j+0];
                float y = box[j+1];
                x = (x+c)/side;
                y = (y+r)/side;
                float w = box[j+2]; //*maxwidth;
                float h = box[j+3]; //*maxheight;
                h = h*h;
                w = w*w;

                int left  = (x-w/2)*im.w;
                int right = (x+w/2)*im.w;
                int top   = (y-h/2)*im.h;
                int bot   = (y+h/2)*im.h;
                draw_box_width(im, left, top, right, bot, width, red, green, blue);
            }
        }
    }
    show_image(im, label);
}

void train_yoloplus(char *cfgfile, char *weightfile)
{
    char *train_images = "/home/pjreddie/data/voc/test/train.txt";
    char *backup_directory = "/home/pjreddie/backup/";
    srand(time(0));
    data_seed = time(0);
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    float avg_loss = -1;
    network net = parse_network_cfg(cfgfile);
    if(weightfile){
        load_weights(&net, weightfile);
    }
    detection_layer layer = get_network_detection_layer(net);
    int imgs = 128;
    int i = *net.seen/imgs;

    char **paths;
    list *plist = get_paths(train_images);
    int N = plist->size;
    paths = (char **)list_to_array(plist);

    if(i*imgs > N*120){
        net.layers[net.n-1].rescore = 1;
    }
    data train, buffer;

    int classes = layer.classes;
    int background = layer.objectness;
    int side = sqrt(get_detection_layer_locations(layer));

    load_args args = {0};
    args.w = net.w;
    args.h = net.h;
    args.paths = paths;
    args.n = imgs;
    args.m = plist->size;
    args.classes = classes;
    args.num_boxes = side;
    args.background = background;
    args.d = &buffer;
    args.type = DETECTION_DATA;

    pthread_t load_thread = load_data_in_thread(args);
    clock_t time;
    while(get_current_batch(net) < net.max_batches){
        i += 1;
        time=clock();
        pthread_join(load_thread, 0);
        train = buffer;
        load_thread = load_data_in_thread(args);

        printf("Loaded: %lf seconds\n", sec(clock()-time));
        time=clock();
        float loss = train_network(net, train);
        if (avg_loss < 0) avg_loss = loss;
        avg_loss = avg_loss*.9 + loss*.1;

        printf("%d: %f, %f avg, %lf seconds, %f rate, %d images, epoch: %f\n", get_current_batch(net), loss, avg_loss, sec(clock()-time), get_current_rate(net), *net.seen, (float)*net.seen/N);

        if((i-1)*imgs <= 80*N && i*imgs > N*80){
            fprintf(stderr, "Second stage done.\n");
            char buff[256];
            sprintf(buff, "%s/%s_second_stage.weights", backup_directory, base);
            save_weights(net, buff);
            net.layers[net.n-1].joint = 1;
            net.layers[net.n-1].objectness = 0;
            background = 0;

            pthread_join(load_thread, 0);
            free_data(buffer);
            args.background = background;
            load_thread = load_data_in_thread(args);
        }

        if((i-1)*imgs <= 120*N && i*imgs > N*120){
            fprintf(stderr, "Third stage done.\n");
            char buff[256];
            sprintf(buff, "%s/%s_final.weights", backup_directory, base);
            net.layers[net.n-1].rescore = 1;
            save_weights(net, buff);
        }

        if(i%1000==0){
            char buff[256];
            sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
            save_weights(net, buff);
        }
        free_data(train);
    }
    char buff[256];
    sprintf(buff, "%s/%s_rescore.weights", backup_directory, base);
    save_weights(net, buff);
}

void convert_yoloplus_detections(float *predictions, int classes, int objectness, int background, int num_boxes, int w, int h, float thresh, float **probs, box *boxes)
{
    int i,j;
    int per_box = 4+classes+(background || objectness);
    for (i = 0; i < num_boxes*num_boxes; ++i){
        float scale = 1;
        if(objectness) scale = 1-predictions[i*per_box];
        int offset = i*per_box+(background||objectness);
        for(j = 0; j < classes; ++j){
            float prob = scale*predictions[offset+j];
            probs[i][j] = (prob > thresh) ? prob : 0;
        }
        int row = i / num_boxes;
        int col = i % num_boxes;
        offset += classes;
        boxes[i].x = (predictions[offset + 0] + col) / num_boxes * w;
        boxes[i].y = (predictions[offset + 1] + row) / num_boxes * h;
        boxes[i].w = pow(predictions[offset + 2], 2) * w;
        boxes[i].h = pow(predictions[offset + 3], 2) * h;
    }
}

void print_yoloplus_detections(FILE **fps, char *id, box *boxes, float **probs, int num_boxes, int classes, int w, int h)
{
    int i, j;
    for(i = 0; i < num_boxes*num_boxes; ++i){
        float xmin = boxes[i].x - boxes[i].w/2.;
        float xmax = boxes[i].x + boxes[i].w/2.;
        float ymin = boxes[i].y - boxes[i].h/2.;
        float ymax = boxes[i].y + boxes[i].h/2.;

        if (xmin < 0) xmin = 0;
        if (ymin < 0) ymin = 0;
        if (xmax > w) xmax = w;
        if (ymax > h) ymax = h;

        for(j = 0; j < classes; ++j){
            if (probs[i][j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, probs[i][j],
                    xmin, ymin, xmax, ymax);
        }
    }
}

void validate_yoloplus(char *cfgfile, char *weightfile)
{
    network net = parse_network_cfg(cfgfile);
    if(weightfile){
        load_weights(&net, weightfile);
    }
    set_batch_network(&net, 1);
    detection_layer layer = get_network_detection_layer(net);
    fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
    srand(time(0));

    char *base = "results/comp4_det_test_";
    list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt");
    char **paths = (char **)list_to_array(plist);

    int classes = layer.classes;
    int objectness = layer.objectness;
    int background = layer.background;
    int num_boxes = sqrt(get_detection_layer_locations(layer));

    int j;
    FILE **fps = calloc(classes, sizeof(FILE *));
    for(j = 0; j < classes; ++j){
        char buff[1024];
        snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
        fps[j] = fopen(buff, "w");
    }
    box *boxes = calloc(num_boxes*num_boxes, sizeof(box));
    float **probs = calloc(num_boxes*num_boxes, sizeof(float *));
    for(j = 0; j < num_boxes*num_boxes; ++j) probs[j] = calloc(classes, sizeof(float *));

    int m = plist->size;
    int i=0;
    int t;

    float thresh = .001;
    int nms = 1;
    float iou_thresh = .5;

    int nthreads = 8;
    image *val = calloc(nthreads, sizeof(image));
    image *val_resized = calloc(nthreads, sizeof(image));
    image *buf = calloc(nthreads, sizeof(image));
    image *buf_resized = calloc(nthreads, sizeof(image));
    pthread_t *thr = calloc(nthreads, sizeof(pthread_t));

    load_args args = {0};
    args.w = net.w;
    args.h = net.h;
    args.type = IMAGE_DATA;

    for(t = 0; t < nthreads; ++t){
        args.path = paths[i+t];
        args.im = &buf[t];
        args.resized = &buf_resized[t];
        thr[t] = load_data_in_thread(args);
    }
    time_t start = time(0);
    for(i = nthreads; i < m+nthreads; i += nthreads){
        fprintf(stderr, "%d\n", i);
        for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
            pthread_join(thr[t], 0);
            val[t] = buf[t];
            val_resized[t] = buf_resized[t];
        }
        for(t = 0; t < nthreads && i+t < m; ++t){
            args.path = paths[i+t];
            args.im = &buf[t];
            args.resized = &buf_resized[t];
            thr[t] = load_data_in_thread(args);
        }
        for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
            char *path = paths[i+t-nthreads];
            char *id = basecfg(path);
            float *X = val_resized[t].data;
            float *predictions = network_predict(net, X);
            int w = val[t].w;
            int h = val[t].h;
            convert_yoloplus_detections(predictions, classes, objectness, background, num_boxes, w, h, thresh, probs, boxes);
            if (nms) do_nms(boxes, probs, num_boxes*num_boxes, classes, iou_thresh);
            print_yoloplus_detections(fps, id, boxes, probs, num_boxes, classes, w, h);
            free(id);
            free_image(val[t]);
            free_image(val_resized[t]);
        }
    }
    fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
}

void test_yoloplus(char *cfgfile, char *weightfile, char *filename, float thresh)
{

    network net = parse_network_cfg(cfgfile);
    if(weightfile){
        load_weights(&net, weightfile);
    }
    detection_layer layer = get_network_detection_layer(net);
    set_batch_network(&net, 1);
    srand(2222222);
    clock_t time;
    char input[256];
    while(1){
        if(filename){
            strncpy(input, filename, 256);
        } else {
            printf("Enter Image Path: ");
            fflush(stdout);
            fgets(input, 256, stdin);
            strtok(input, "\n");
        }
        image im = load_image_color(input,0,0);
        image sized = resize_image(im, net.w, net.h);
        float *X = sized.data;
        time=clock();
        float *predictions = network_predict(net, X);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        draw_yoloplus(im, predictions, 7, layer.objectness, "predictions", thresh);
        free_image(im);
        free_image(sized);
#ifdef OPENCV
        cvWaitKey(0);
        cvDestroyAllWindows();
#endif
        if (filename) break;
    }
}

void run_yoloplus(int argc, char **argv)
{
    float thresh = find_float_arg(argc, argv, "-thresh", .2);
    if(argc < 4){
        fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
        return;
    }

    char *cfg = argv[3];
    char *weights = (argc > 4) ? argv[4] : 0;
    char *filename = (argc > 5) ? argv[5]: 0;
    if(0==strcmp(argv[2], "test")) test_yoloplus(cfg, weights, filename, thresh);
    else if(0==strcmp(argv[2], "train")) train_yoloplus(cfg, weights);
    else if(0==strcmp(argv[2], "valid")) validate_yoloplus(cfg, weights);
}

			@@ -34,7 +34,7 @@
			LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand
			endif

			OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o region_layer.o layer.o compare.o
			OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o region_layer.o layer.o compare.o yoloplus.o
			ifeq ($(GPU), 1)
			OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
			endif

			@@ -27,7 +27,7 @@
			activation=leaky

			[maxpool]
			size=3
			size=2
			stride=2

			[convolutional]
			@@ -38,7 +38,7 @@
			activation=leaky

			[maxpool]
			size=3
			size=2
			stride=2

			[convolutional]
			@@ -49,7 +49,7 @@
			activation=leaky

			[maxpool]
			size=3
			size=2
			stride=2

			[convolutional]
			@@ -60,7 +60,7 @@
			activation=leaky

			[maxpool]
			size=3
			size=2
			stride=2

			[convolutional]
			@@ -71,7 +71,7 @@
			activation=leaky

			[maxpool]
			size=3
			size=2
			stride=2

			[convolutional]
			@@ -82,7 +82,7 @@
			activation=leaky

			[maxpool]
			size=3
			size=2
			stride=2

			[convolutional]
			@@ -99,7 +99,7 @@

			[connected]
			output=1000
			activation=linear
			activation=leaky

			[softmax]

			@@ -4,10 +4,15 @@
			height=448
			width=448
			channels=3
			learning_rate=0.01
			learning_rate=0.001
			momentum=0.9
			decay=0.0005

			policy=steps
			steps=50, 5000
			scales=10, .1
			max_batches = 8000

			[crop]
			crop_width=448
			crop_height=448

			@@ -13,6 +13,7 @@

			extern void run_imagenet(int argc, char **argv);
			extern void run_yolo(int argc, char **argv);
			extern void run_yoloplus(int argc, char **argv);
			extern void run_coco(int argc, char **argv);
			extern void run_writing(int argc, char **argv);
			extern void run_captcha(int argc, char **argv);
			@@ -178,6 +179,8 @@
			average(argc, argv);
			} else if (0 == strcmp(argv[1], "yolo")){
			run_yolo(argc, argv);
			} else if (0 == strcmp(argv[1], "yoloplus")){
			run_yoloplus(argc, argv);
			} else if (0 == strcmp(argv[1], "coco")){
			run_coco(argc, argv);
			} else if (0 == strcmp(argv[1], "compare")){

			@@ -85,11 +85,12 @@
			int size = get_detection_layer_output_size(l) * l.batch;
			memset(l.delta, 0, size * sizeof(float));
			for (i = 0; i < l.batch*locations; ++i) {
			int classes = l.objectness+l.classes;
			int classes = (l.objectness \|\| l.background)+l.classes;
			int offset = i*(classes+l.coords);
			for (j = offset; j < offset+classes; ++j) {
			*(l.cost) += pow(state.truth[j] - l.output[j], 2);
			l.delta[j] = state.truth[j] - l.output[j];
			if(l.background && j == offset) l.delta[j] *= .1;
			}

			box truth;
			@@ -115,12 +116,18 @@
			l.delta[j+2] = 4 * (state.truth[j+2] - l.output[j+2]);
			l.delta[j+3] = 4 * (state.truth[j+3] - l.output[j+3]);
			if(l.rescore){
			if(l.objectness){
			state.truth[offset] = iou;
			l.delta[offset] = state.truth[offset] - l.output[offset];
			}
			else{
			for (j = offset; j < offset+classes; ++j) {
			if(state.truth[j]) state.truth[j] = iou;
			l.delta[j] = state.truth[j] - l.output[j];
			}
			}
			}
			}
			printf("Avg IOU: %f\n", avg_iou/count);
			}
			}

			@@ -29,15 +29,26 @@
			float get_current_rate(network net)
			{
			int batch_num = get_current_batch(net);
			int i;
			float rate;
			switch (net.policy) {
			case CONSTANT:
			return net.learning_rate;
			case STEP:
			return net.learning_rate * pow(net.gamma, batch_num/net.step);
			return net.learning_rate * pow(net.scale, batch_num/net.step);
			case STEPS:
			rate = net.learning_rate;
			for(i = 0; i < net.num_steps; ++i){
			if(net.steps[i] > batch_num) return rate;
			rate *= net.scales[i];
			}
			return rate;
			case EXP:
			return net.learning_rate * pow(net.gamma, batch_num);
			case POLY:
			return net.learning_rate * pow(1 - (float)batch_num / net.max_batches, net.power);
			case SIG:
			return net.learning_rate * (1/(1+exp(net.gamma*(batch_num - net.step))));
			default:
			fprintf(stderr, "Policy is weird!\n");
			return net.learning_rate;

			@@ -8,7 +8,7 @@
			#include "data.h"

			typedef enum {
			CONSTANT, STEP, EXP, POLY
			CONSTANT, STEP, EXP, POLY, STEPS, SIG
			} learning_rate_policy;

			typedef struct {
			@@ -25,9 +25,13 @@

			float learning_rate;
			float gamma;
			float scale;
			float power;
			int step;
			int max_batches;
			float *scales;
			int *steps;
			int num_steps;

			int inputs;
			int h, w, c;

			@@ -169,7 +169,7 @@
			int rescore = option_find_int(options, "rescore", 0);
			int joint = option_find_int(options, "joint", 0);
			int objectness = option_find_int(options, "objectness", 0);
			int background = 0;
			int background = option_find_int(options, "background", 0);
			detection_layer layer = make_detection_layer(params.batch, params.inputs, classes, coords, joint, rescore, background, objectness);
			return layer;
			}
			@@ -312,6 +312,8 @@
			if (strcmp(s, "constant")==0) return CONSTANT;
			if (strcmp(s, "step")==0) return STEP;
			if (strcmp(s, "exp")==0) return EXP;
			if (strcmp(s, "sigmoid")==0) return SIG;
			if (strcmp(s, "steps")==0) return STEPS;
			fprintf(stderr, "Couldn't find policy %s, going with constant\n", s);
			return CONSTANT;
			}
			@@ -337,9 +339,36 @@
			net->policy = get_policy(policy_s);
			if(net->policy == STEP){
			net->step = option_find_int(options, "step", 1);
			net->gamma = option_find_float(options, "gamma", 1);
			net->scale = option_find_float(options, "scale", 1);
			} else if (net->policy == STEPS){
			char *l = option_find(options, "steps");
			char *p = option_find(options, "scales");
			if(!l \|\| !p) error("STEPS policy must have steps and scales in cfg file");

			int len = strlen(l);
			int n = 1;
			int i;
			for(i = 0; i < len; ++i){
			if (l[i] == ',') ++n;
			}
			int *steps = calloc(n, sizeof(int));
			float *scales = calloc(n, sizeof(float));
			for(i = 0; i < n; ++i){
			int step = atoi(l);
			float scale = atof(p);
			l = strchr(l, ',')+1;
			p = strchr(p, ',')+1;
			steps[i] = step;
			scales[i] = scale;
			}
			net->scales = scales;
			net->steps = steps;
			net->num_steps = n;
			} else if (net->policy == EXP){
			net->gamma = option_find_float(options, "gamma", 1);
			} else if (net->policy == SIG){
			net->gamma = option_find_float(options, "gamma", 1);
			net->step = option_find_int(options, "step", 1);
			} else if (net->policy == POLY){
			net->power = option_find_float(options, "power", 1);
			}

			@@ -66,7 +66,6 @@
			load_weights(&net, weightfile);
			}
			detection_layer layer = get_network_detection_layer(net);
			printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
			int imgs = 128;
			int i = *net.seen/imgs;

			@@ -75,10 +74,6 @@
			int N = plist->size;
			paths = (char **)list_to_array(plist);

			if(iimgs > N80){
			net.layers[net.n-1].joint = 1;
			net.layers[net.n-1].objectness = 0;
			}
			if(iimgs > N120){
			net.layers[net.n-1].rescore = 1;
			}
			@@ -102,7 +97,7 @@

			pthread_t load_thread = load_data_in_thread(args);
			clock_t time;
			while(iimgs < N130){
			while(get_current_batch(net) < net.max_batches){
			i += 1;
			time=clock();
			pthread_join(load_thread, 0);
			@@ -115,19 +110,10 @@
			if (avg_loss < 0) avg_loss = loss;
			avg_loss = avg_loss.9 + loss.1;

			printf("%d: %f, %f avg, %lf seconds, %d images, epoch: %f\n", i, loss, avg_loss, sec(clock()-time), iimgs, ((float)i)imgs/N);

			if((i-1)imgs <= N && iimgs > N){
			fprintf(stderr, "First stage done\n");
			net.learning_rate *= 10;
			char buff[256];
			sprintf(buff, "%s/%s_first_stage.weights", backup_directory, base);
			save_weights(net, buff);
			}
			printf("%d: %f, %f avg, %lf seconds, %f rate, %d images, epoch: %f\n", get_current_batch(net), loss, avg_loss, sec(clock()-time), get_current_rate(net), net.seen, (float)net.seen/N);

			if((i-1)imgs <= 80N && iimgs > N80){
			fprintf(stderr, "Second stage done.\n");
			net.learning_rate *= .1;
			char buff[256];
			sprintf(buff, "%s/%s_second_stage.weights", backup_directory, base);
			save_weights(net, buff);

New file
			@@ -0,0 +1,334 @@
			#include "network.h"
			#include "detection_layer.h"
			#include "cost_layer.h"
			#include "utils.h"
			#include "parser.h"
			#include "box.h"

			#ifdef OPENCV
			#include "opencv2/highgui/highgui_c.h"
			#endif

			char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};

			void draw_yoloplus(image im, float box, int side, int objectness, char label, float thresh)
			{
			int classes = 20;
			int elems = 4+classes+objectness;
			int j;
			int r, c;

			for(r = 0; r < side; ++r){
			for(c = 0; c < side; ++c){
			j = (rside + c) elems;
			float scale = 1;
			if(objectness) scale = 1 - box[j++];
			int class = max_index(box+j, classes);
			if(scale * box[j+class] > thresh){
			int width = sqrt(scalebox[j+class])5 + 1;
			printf("%f %s\n", scale * box[j+class], voc_names[class]);
			float red = get_color(0,class,classes);
			float green = get_color(1,class,classes);
			float blue = get_color(2,class,classes);

			j += classes;
			float x = box[j+0];
			float y = box[j+1];
			x = (x+c)/side;
			y = (y+r)/side;
			float w = box[j+2]; //*maxwidth;
			float h = box[j+3]; //*maxheight;
			h = h*h;
			w = w*w;

			int left = (x-w/2)*im.w;
			int right = (x+w/2)*im.w;
			int top = (y-h/2)*im.h;
			int bot = (y+h/2)*im.h;
			draw_box_width(im, left, top, right, bot, width, red, green, blue);
			}
			}
			}
			show_image(im, label);
			}

			void train_yoloplus(char cfgfile, char weightfile)
			{
			char *train_images = "/home/pjreddie/data/voc/test/train.txt";
			char *backup_directory = "/home/pjreddie/backup/";
			srand(time(0));
			data_seed = time(0);
			char *base = basecfg(cfgfile);
			printf("%s\n", base);
			float avg_loss = -1;
			network net = parse_network_cfg(cfgfile);
			if(weightfile){
			load_weights(&net, weightfile);
			}
			detection_layer layer = get_network_detection_layer(net);
			int imgs = 128;
			int i = *net.seen/imgs;

			char **paths;
			list *plist = get_paths(train_images);
			int N = plist->size;
			paths = (char **)list_to_array(plist);

			if(iimgs > N120){
			net.layers[net.n-1].rescore = 1;
			}
			data train, buffer;

			int classes = layer.classes;
			int background = layer.objectness;
			int side = sqrt(get_detection_layer_locations(layer));

			load_args args = {0};
			args.w = net.w;
			args.h = net.h;
			args.paths = paths;
			args.n = imgs;
			args.m = plist->size;
			args.classes = classes;
			args.num_boxes = side;
			args.background = background;
			args.d = &buffer;
			args.type = DETECTION_DATA;

			pthread_t load_thread = load_data_in_thread(args);
			clock_t time;
			while(get_current_batch(net) < net.max_batches){
			i += 1;
			time=clock();
			pthread_join(load_thread, 0);
			train = buffer;
			load_thread = load_data_in_thread(args);

			printf("Loaded: %lf seconds\n", sec(clock()-time));
			time=clock();
			float loss = train_network(net, train);
			if (avg_loss < 0) avg_loss = loss;
			avg_loss = avg_loss.9 + loss.1;

			printf("%d: %f, %f avg, %lf seconds, %f rate, %d images, epoch: %f\n", get_current_batch(net), loss, avg_loss, sec(clock()-time), get_current_rate(net), net.seen, (float)net.seen/N);

			if((i-1)imgs <= 80N && iimgs > N80){
			fprintf(stderr, "Second stage done.\n");
			char buff[256];
			sprintf(buff, "%s/%s_second_stage.weights", backup_directory, base);
			save_weights(net, buff);
			net.layers[net.n-1].joint = 1;
			net.layers[net.n-1].objectness = 0;
			background = 0;

			pthread_join(load_thread, 0);
			free_data(buffer);
			args.background = background;
			load_thread = load_data_in_thread(args);
			}

			if((i-1)imgs <= 120N && iimgs > N120){
			fprintf(stderr, "Third stage done.\n");
			char buff[256];
			sprintf(buff, "%s/%s_final.weights", backup_directory, base);
			net.layers[net.n-1].rescore = 1;
			save_weights(net, buff);
			}

			if(i%1000==0){
			char buff[256];
			sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
			save_weights(net, buff);
			}
			free_data(train);
			}
			char buff[256];
			sprintf(buff, "%s/%s_rescore.weights", backup_directory, base);
			save_weights(net, buff);
			}

			void convert_yoloplus_detections(float predictions, int classes, int objectness, int background, int num_boxes, int w, int h, float thresh, float probs, box boxes)
			{
			int i,j;
			int per_box = 4+classes+(background \|\| objectness);
			for (i = 0; i < num_boxes*num_boxes; ++i){
			float scale = 1;
			if(objectness) scale = 1-predictions[i*per_box];
			int offset = i*per_box+(background\|\|objectness);
			for(j = 0; j < classes; ++j){
			float prob = scale*predictions[offset+j];
			probs[i][j] = (prob > thresh) ? prob : 0;
			}
			int row = i / num_boxes;
			int col = i % num_boxes;
			offset += classes;
			boxes[i].x = (predictions[offset + 0] + col) / num_boxes * w;
			boxes[i].y = (predictions[offset + 1] + row) / num_boxes * h;
			boxes[i].w = pow(predictions[offset + 2], 2) * w;
			boxes[i].h = pow(predictions[offset + 3], 2) * h;
			}
			}

			void print_yoloplus_detections(FILE *fps, char id, box boxes, float *probs, int num_boxes, int classes, int w, int h)
			{
			int i, j;
			for(i = 0; i < num_boxes*num_boxes; ++i){
			float xmin = boxes[i].x - boxes[i].w/2.;
			float xmax = boxes[i].x + boxes[i].w/2.;
			float ymin = boxes[i].y - boxes[i].h/2.;
			float ymax = boxes[i].y + boxes[i].h/2.;

			if (xmin < 0) xmin = 0;
			if (ymin < 0) ymin = 0;
			if (xmax > w) xmax = w;
			if (ymax > h) ymax = h;

			for(j = 0; j < classes; ++j){
			if (probs[i][j]) fprintf(fps[j], "%s %f %f %f %f %f\n", id, probs[i][j],
			xmin, ymin, xmax, ymax);
			}
			}
			}

			void validate_yoloplus(char cfgfile, char weightfile)
			{
			network net = parse_network_cfg(cfgfile);
			if(weightfile){
			load_weights(&net, weightfile);
			}
			set_batch_network(&net, 1);
			detection_layer layer = get_network_detection_layer(net);
			fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay);
			srand(time(0));

			char *base = "results/comp4_det_test_";
			list *plist = get_paths("/home/pjreddie/data/voc/test/2007_test.txt");
			char paths = (char )list_to_array(plist);

			int classes = layer.classes;
			int objectness = layer.objectness;
			int background = layer.background;
			int num_boxes = sqrt(get_detection_layer_locations(layer));

			int j;
			FILE *fps = calloc(classes, sizeof(FILE ));
			for(j = 0; j < classes; ++j){
			char buff[1024];
			snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
			fps[j] = fopen(buff, "w");
			}
			box boxes = calloc(num_boxesnum_boxes, sizeof(box));
			float *probs = calloc(num_boxesnum_boxes, sizeof(float *));
			for(j = 0; j < num_boxesnum_boxes; ++j) probs[j] = calloc(classes, sizeof(float ));

			int m = plist->size;
			int i=0;
			int t;

			float thresh = .001;
			int nms = 1;
			float iou_thresh = .5;

			int nthreads = 8;
			image *val = calloc(nthreads, sizeof(image));
			image *val_resized = calloc(nthreads, sizeof(image));
			image *buf = calloc(nthreads, sizeof(image));
			image *buf_resized = calloc(nthreads, sizeof(image));
			pthread_t *thr = calloc(nthreads, sizeof(pthread_t));

			load_args args = {0};
			args.w = net.w;
			args.h = net.h;
			args.type = IMAGE_DATA;

			for(t = 0; t < nthreads; ++t){
			args.path = paths[i+t];
			args.im = &buf[t];
			args.resized = &buf_resized[t];
			thr[t] = load_data_in_thread(args);
			}
			time_t start = time(0);
			for(i = nthreads; i < m+nthreads; i += nthreads){
			fprintf(stderr, "%d\n", i);
			for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
			pthread_join(thr[t], 0);
			val[t] = buf[t];
			val_resized[t] = buf_resized[t];
			}
			for(t = 0; t < nthreads && i+t < m; ++t){
			args.path = paths[i+t];
			args.im = &buf[t];
			args.resized = &buf_resized[t];
			thr[t] = load_data_in_thread(args);
			}
			for(t = 0; t < nthreads && i+t-nthreads < m; ++t){
			char *path = paths[i+t-nthreads];
			char *id = basecfg(path);
			float *X = val_resized[t].data;
			float *predictions = network_predict(net, X);
			int w = val[t].w;
			int h = val[t].h;
			convert_yoloplus_detections(predictions, classes, objectness, background, num_boxes, w, h, thresh, probs, boxes);
			if (nms) do_nms(boxes, probs, num_boxes*num_boxes, classes, iou_thresh);
			print_yoloplus_detections(fps, id, boxes, probs, num_boxes, classes, w, h);
			free(id);
			free_image(val[t]);
			free_image(val_resized[t]);
			}
			}
			fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start));
			}

			void test_yoloplus(char cfgfile, char weightfile, char *filename, float thresh)
			{

			network net = parse_network_cfg(cfgfile);
			if(weightfile){
			load_weights(&net, weightfile);
			}
			detection_layer layer = get_network_detection_layer(net);
			set_batch_network(&net, 1);
			srand(2222222);
			clock_t time;
			char input[256];
			while(1){
			if(filename){
			strncpy(input, filename, 256);
			} else {
			printf("Enter Image Path: ");
			fflush(stdout);
			fgets(input, 256, stdin);
			strtok(input, "\n");
			}
			image im = load_image_color(input,0,0);
			image sized = resize_image(im, net.w, net.h);
			float *X = sized.data;
			time=clock();
			float *predictions = network_predict(net, X);
			printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
			draw_yoloplus(im, predictions, 7, layer.objectness, "predictions", thresh);
			free_image(im);
			free_image(sized);
			#ifdef OPENCV
			cvWaitKey(0);
			cvDestroyAllWindows();
			#endif
			if (filename) break;
			}
			}

			void run_yoloplus(int argc, char **argv)
			{
			float thresh = find_float_arg(argc, argv, "-thresh", .2);
			if(argc < 4){
			fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
			return;
			}

			char *cfg = argv[3];
			char *weights = (argc > 4) ? argv[4] : 0;
			char *filename = (argc > 5) ? argv[5]: 0;
			if(0==strcmp(argv[2], "test")) test_yoloplus(cfg, weights, filename, thresh);
			else if(0==strcmp(argv[2], "train")) train_yoloplus(cfg, weights);
			else if(0==strcmp(argv[2], "valid")) validate_yoloplus(cfg, weights);
			}