~speedprog/mtg/mtg_card_detector.git

parent: 75fe6037 | patch | commit | ignore whitespace

Joseph Redmon

2016-11-27 b3c4fc9f223d9b6f50a1652d8d116fcdcc16f2e8

:fire: ARE YOU NOT ENTERTAINED :fire:

4 files modified

2 files added

	cfg/yolo-tiny.cfg	134 ●●●●● patch \| view \| raw \| blame \| history
	cfg/yolo-tiny_voc.cfg	134 ●●●●● patch \| view \| raw \| blame \| history
	src/demo.c	2 ●●●●● patch \| view \| raw \| blame \| history
	src/detector.c	63 ●●●●● patch \| view \| raw \| blame \| history
	src/region_layer.c	19 ●●●●● patch \| view \| raw \| blame \| history
	src/region_layer.h	2 ●●●●● patch \| view \| raw \| blame \| history

 cfg/yolo-tiny.cfg

New file
@@ -0,0 +1,134 @@
[net]
batch=64
subdivisions=8
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1

learning_rate=0.001
max_batches = 120000
policy=steps
steps=-1,100,80000,100000
scales=.1,10,.1,.1

[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=1

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

###########

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=425
activation=linear

[region]
anchors = 0.738768,0.874946,  2.42204,2.65704,  4.30971,7.04493,  10.246,4.59428,  12.6868,11.8741
bias_match=1
classes=80
coords=4
num=5
softmax=1
jitter=.2
rescore=1

object_scale=5
noobject_scale=1
class_scale=1
coord_scale=1

absolute=1
thresh = .6
random=1

 cfg/yolo-tiny_voc.cfg

New file
@@ -0,0 +1,134 @@
[net]
batch=64
subdivisions=8
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1

learning_rate=0.001
max_batches = 40100
policy=steps
steps=-1,100,20000,30000
scales=.1,10,.1,.1

[convolutional]
batch_normalize=1
filters=16
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=1

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

###########

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=125
activation=linear

[region]
anchors = 1.08,1.19,  3.42,4.41,  6.63,11.38,  9.42,5.11,  16.62,10.52
bias_match=1
classes=20
coords=4
num=5
softmax=1
jitter=.2
rescore=1

object_scale=5
noobject_scale=1
class_scale=1
coord_scale=1

absolute=1
thresh = .6
random=1

 src/demo.c

@@ -63,7 +63,7 @@
    if(l.type == DETECTION){
        get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
    } else if (l.type == REGION){
        get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
        get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0, 0);
    } else {
        error("Last layer must produce detections\n");
    }

 src/detector.c

@@ -66,7 +66,7 @@
    args.num_boxes = l.max_boxes;
    args.d = &buffer;
    args.type = DETECTION_DATA;
    args.threads = 4;
    args.threads = 8;

    args.angle = net.angle;
    args.exposure = net.exposure;
@@ -81,6 +81,7 @@
        if(l.random && count++%10 == 0){
            printf("Resizing\n");
            int dim = (rand() % 10 + 10) * 32;
            if (get_current_batch(net)+100 > net.max_batches) dim = 544;
            //int dim = (rand() % 4 + 16) * 32;
            printf("%d\n", dim);
            args.w = dim;
@@ -208,7 +209,7 @@
    }
}

void print_imagenet_detections(FILE *fp, int id, box *boxes, float **probs, int total, int classes, int w, int h, int *map)
void print_imagenet_detections(FILE *fp, int id, box *boxes, float **probs, int total, int classes, int w, int h)
{
    int i, j;
    for(i = 0; i < total; ++i){
@@ -224,7 +225,6 @@

        for(j = 0; j < classes; ++j){
            int class = j;
            if (map) class = map[j];
            if (probs[i][class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, probs[i][class],
                    xmin, ymin, xmax, ymax);
        }
@@ -233,6 +233,7 @@

void validate_detector(char *datacfg, char *cfgfile, char *weightfile)
{
    int j;
    list *options = read_data_cfg(datacfg);
    char *valid_images = option_find_str(options, "valid", "data/train.list");
    char *name_list = option_find_str(options, "names", "data/names.list");
@@ -242,23 +243,6 @@
    int *map = 0;
    if (mapf) map = read_map(mapf);


    char buff[1024];
    char *type = option_find_str(options, "eval", "voc");
    FILE *fp = 0;
    int coco = 0;
    int imagenet = 0;
    if(0==strcmp(type, "coco")){
        snprintf(buff, 1024, "%s/coco_results.json", prefix);
        fp = fopen(buff, "w");
        fprintf(fp, "[\n");
        coco = 1;
    } else if(0==strcmp(type, "imagenet")){
        snprintf(buff, 1024, "%s/imagenet-detection.txt", prefix);
        fp = fopen(buff, "w");
        imagenet = 1;
    }

    network net = parse_network_cfg(cfgfile);
    if(weightfile){
        load_weights(&net, weightfile);
@@ -274,12 +258,31 @@
    layer l = net.layers[net.n-1];
    int classes = l.classes;

    int j;
    FILE **fps = calloc(classes, sizeof(FILE *));
    for(j = 0; j < classes; ++j){
        snprintf(buff, 1024, "%s/%s%s.txt", prefix, base, names[j]);
        fps[j] = fopen(buff, "w");
    char buff[1024];
    char *type = option_find_str(options, "eval", "voc");
    FILE *fp = 0;
    FILE **fps = 0;
    int coco = 0;
    int imagenet = 0;
    if(0==strcmp(type, "coco")){
        snprintf(buff, 1024, "%s/coco_results.json", prefix);
        fp = fopen(buff, "w");
        fprintf(fp, "[\n");
        coco = 1;
    } else if(0==strcmp(type, "imagenet")){
        snprintf(buff, 1024, "%s/imagenet-detection.txt", prefix);
        fp = fopen(buff, "w");
        imagenet = 1;
        classes = 200;
    } else {
        fps = calloc(classes, sizeof(FILE *));
        for(j = 0; j < classes; ++j){
            snprintf(buff, 1024, "%s/%s%s.txt", prefix, base, names[j]);
            fps[j] = fopen(buff, "w");
        }
    }


    box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
    float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
    for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
@@ -330,12 +333,12 @@
            network_predict(net, X);
            int w = val[t].w;
            int h = val[t].h;
            get_region_boxes(l, w, h, thresh, probs, boxes, 0);
            get_region_boxes(l, w, h, thresh, probs, boxes, 0, map);
            if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
            if (coco){
                print_cocos(fp, path, boxes, probs, l.w*l.h*l.n, classes, w, h);
            } else if (imagenet){
                print_imagenet_detections(fp, i+t-nthreads+1 + 9741, boxes, probs, l.w*l.h*l.n, 200, w, h, map);
                print_imagenet_detections(fp, i+t-nthreads+1, boxes, probs, l.w*l.h*l.n, classes, w, h);
            } else {
                print_detector_detections(fps, id, boxes, probs, l.w*l.h*l.n, classes, w, h);
            }
@@ -345,7 +348,7 @@
        }
    }
    for(j = 0; j < classes; ++j){
        fclose(fps[j]);
        if(fps) fclose(fps[j]);
    }
    if(coco){
        fseek(fp, -2, SEEK_CUR); 
@@ -394,7 +397,7 @@
        image sized = resize_image(orig, net.w, net.h);
        char *id = basecfg(path);
        network_predict(net, sized.data);
        get_region_boxes(l, 1, 1, thresh, probs, boxes, 1);
        get_region_boxes(l, 1, 1, thresh, probs, boxes, 1, 0);
        if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms);

        char labelpath[4096];
@@ -473,7 +476,7 @@
        time=clock();
        network_predict(net, X);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        get_region_boxes(l, 1, 1, thresh, probs, boxes, 0);
        get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0);
        if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
        draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
        save_image(im, "predictions");

 src/region_layer.c

@@ -196,7 +196,8 @@
                if(truth.x > 100000 && truth.y > 100000){
                    for(n = 0; n < l.n*l.w*l.h; ++n){
                        int index = size*n + b*l.outputs + 5;
                        float p = get_hierarchy_probability(l.output + index, l.softmax_tree, class);
                        float scale =  l.output[index-1];
                        float p = scale*get_hierarchy_probability(l.output + index, l.softmax_tree, class);
                        if(p > maxp){
                            maxp = p;
                            maxi = n;
@@ -324,7 +325,7 @@
    axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
}

void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness)
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map)
{
    int i,j,n;
    float *predictions = l.output;
@@ -348,8 +349,13 @@

                hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0);
                int found = 0;
                for(j = l.classes - 1; j >= 0; --j){
                    if(1){
                if(map){
                    for(j = 0; j < 200; ++j){
                        float prob = scale*predictions[class_index+map[j]];
                        probs[index][j] = (prob > thresh) ? prob : 0;
                    }
                } else {
                    for(j = l.classes - 1; j >= 0; --j){
                        if(!found && predictions[class_index + j] > .5){
                            found = 1;
                        } else {
@@ -357,12 +363,9 @@
                        }
                        float prob = predictions[class_index+j];
                        probs[index][j] = (scale > thresh) ? prob : 0;
                    }else{
                        float prob = scale*predictions[class_index+j];
                        probs[index][j] = (prob > thresh) ? prob : 0;
                    }
                }
            }else{
            } else {
                for(j = 0; j < l.classes; ++j){
                    float prob = scale*predictions[class_index+j];
                    probs[index][j] = (prob > thresh) ? prob : 0;

 src/region_layer.h

@@ -9,7 +9,7 @@
region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords);
void forward_region_layer(const region_layer l, network_state state);
void backward_region_layer(const region_layer l, network_state state);
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map);
void resize_region_layer(layer *l, int w, int h);

#ifdef GPU

New file
			@@ -0,0 +1,134 @@
			[net]
			batch=64
			subdivisions=8
			width=416
			height=416
			channels=3
			momentum=0.9
			decay=0.0005
			angle=0
			saturation = 1.5
			exposure = 1.5
			hue=.1

			learning_rate=0.001
			max_batches = 120000
			policy=steps
			steps=-1,100,80000,100000
			scales=.1,10,.1,.1

			[convolutional]
			batch_normalize=1
			filters=16
			size=3
			stride=1
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=2

			[convolutional]
			batch_normalize=1
			filters=32
			size=3
			stride=1
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=2

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=2

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=2

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=2

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=1

			[convolutional]
			batch_normalize=1
			filters=1024
			size=3
			stride=1
			pad=1
			activation=leaky

			###########

			[convolutional]
			batch_normalize=1
			size=3
			stride=1
			pad=1
			filters=1024
			activation=leaky

			[convolutional]
			size=1
			stride=1
			pad=1
			filters=425
			activation=linear

			[region]
			anchors = 0.738768,0.874946, 2.42204,2.65704, 4.30971,7.04493, 10.246,4.59428, 12.6868,11.8741
			bias_match=1
			classes=80
			coords=4
			num=5
			softmax=1
			jitter=.2
			rescore=1

			object_scale=5
			noobject_scale=1
			class_scale=1
			coord_scale=1

			absolute=1
			thresh = .6
			random=1

			@@ -63,7 +63,7 @@
			if(l.type == DETECTION){
			get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
			} else if (l.type == REGION){
			get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
			get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0, 0);
			} else {
			error("Last layer must produce detections\n");
			}

			@@ -66,7 +66,7 @@
			args.num_boxes = l.max_boxes;
			args.d = &buffer;
			args.type = DETECTION_DATA;
			args.threads = 4;
			args.threads = 8;

			args.angle = net.angle;
			args.exposure = net.exposure;
			@@ -81,6 +81,7 @@
			if(l.random && count++%10 == 0){
			printf("Resizing\n");
			int dim = (rand() % 10 + 10) * 32;
			if (get_current_batch(net)+100 > net.max_batches) dim = 544;
			//int dim = (rand() % 4 + 16) * 32;
			printf("%d\n", dim);
			args.w = dim;
			@@ -208,7 +209,7 @@
			}
			}

			void print_imagenet_detections(FILE fp, int id, box boxes, float *probs, int total, int classes, int w, int h, int map)
			void print_imagenet_detections(FILE fp, int id, box boxes, float **probs, int total, int classes, int w, int h)
			{
			int i, j;
			for(i = 0; i < total; ++i){
			@@ -224,7 +225,6 @@

			for(j = 0; j < classes; ++j){
			int class = j;
			if (map) class = map[j];
			if (probs[i][class]) fprintf(fp, "%d %d %f %f %f %f %f\n", id, j+1, probs[i][class],
			xmin, ymin, xmax, ymax);
			}
			@@ -233,6 +233,7 @@

			void validate_detector(char datacfg, char cfgfile, char *weightfile)
			{
			int j;
			list *options = read_data_cfg(datacfg);
			char *valid_images = option_find_str(options, "valid", "data/train.list");
			char *name_list = option_find_str(options, "names", "data/names.list");
			@@ -242,23 +243,6 @@
			int *map = 0;
			if (mapf) map = read_map(mapf);


			char buff[1024];
			char *type = option_find_str(options, "eval", "voc");
			FILE *fp = 0;
			int coco = 0;
			int imagenet = 0;
			if(0==strcmp(type, "coco")){
			snprintf(buff, 1024, "%s/coco_results.json", prefix);
			fp = fopen(buff, "w");
			fprintf(fp, "[\n");
			coco = 1;
			} else if(0==strcmp(type, "imagenet")){
			snprintf(buff, 1024, "%s/imagenet-detection.txt", prefix);
			fp = fopen(buff, "w");
			imagenet = 1;
			}

			network net = parse_network_cfg(cfgfile);
			if(weightfile){
			load_weights(&net, weightfile);
			@@ -274,12 +258,31 @@
			layer l = net.layers[net.n-1];
			int classes = l.classes;

			int j;
			FILE *fps = calloc(classes, sizeof(FILE ));
			for(j = 0; j < classes; ++j){
			snprintf(buff, 1024, "%s/%s%s.txt", prefix, base, names[j]);
			fps[j] = fopen(buff, "w");
			char buff[1024];
			char *type = option_find_str(options, "eval", "voc");
			FILE *fp = 0;
			FILE **fps = 0;
			int coco = 0;
			int imagenet = 0;
			if(0==strcmp(type, "coco")){
			snprintf(buff, 1024, "%s/coco_results.json", prefix);
			fp = fopen(buff, "w");
			fprintf(fp, "[\n");
			coco = 1;
			} else if(0==strcmp(type, "imagenet")){
			snprintf(buff, 1024, "%s/imagenet-detection.txt", prefix);
			fp = fopen(buff, "w");
			imagenet = 1;
			classes = 200;
			} else {
			fps = calloc(classes, sizeof(FILE *));
			for(j = 0; j < classes; ++j){
			snprintf(buff, 1024, "%s/%s%s.txt", prefix, base, names[j]);
			fps[j] = fopen(buff, "w");
			}
			}


			box boxes = calloc(l.wl.h*l.n, sizeof(box));
			float *probs = calloc(l.wl.hl.n, sizeof(float ));
			for(j = 0; j < l.wl.hl.n; ++j) probs[j] = calloc(classes, sizeof(float *));
			@@ -330,12 +333,12 @@
			network_predict(net, X);
			int w = val[t].w;
			int h = val[t].h;
			get_region_boxes(l, w, h, thresh, probs, boxes, 0);
			get_region_boxes(l, w, h, thresh, probs, boxes, 0, map);
			if (nms) do_nms_sort(boxes, probs, l.wl.hl.n, classes, nms);
			if (coco){
			print_cocos(fp, path, boxes, probs, l.wl.hl.n, classes, w, h);
			} else if (imagenet){
			print_imagenet_detections(fp, i+t-nthreads+1 + 9741, boxes, probs, l.wl.hl.n, 200, w, h, map);
			print_imagenet_detections(fp, i+t-nthreads+1, boxes, probs, l.wl.hl.n, classes, w, h);
			} else {
			print_detector_detections(fps, id, boxes, probs, l.wl.hl.n, classes, w, h);
			}
			@@ -345,7 +348,7 @@
			}
			}
			for(j = 0; j < classes; ++j){
			fclose(fps[j]);
			if(fps) fclose(fps[j]);
			}
			if(coco){
			fseek(fp, -2, SEEK_CUR);
			@@ -394,7 +397,7 @@
			image sized = resize_image(orig, net.w, net.h);
			char *id = basecfg(path);
			network_predict(net, sized.data);
			get_region_boxes(l, 1, 1, thresh, probs, boxes, 1);
			get_region_boxes(l, 1, 1, thresh, probs, boxes, 1, 0);
			if (nms) do_nms(boxes, probs, l.wl.hl.n, 1, nms);

			char labelpath[4096];
			@@ -473,7 +476,7 @@
			time=clock();
			network_predict(net, X);
			printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
			get_region_boxes(l, 1, 1, thresh, probs, boxes, 0);
			get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0);
			if (nms) do_nms_sort(boxes, probs, l.wl.hl.n, l.classes, nms);
			draw_detections(im, l.wl.hl.n, thresh, boxes, probs, names, alphabet, l.classes);
			save_image(im, "predictions");

			@@ -196,7 +196,8 @@
			if(truth.x > 100000 && truth.y > 100000){
			for(n = 0; n < l.nl.wl.h; ++n){
			int index = sizen + bl.outputs + 5;
			float p = get_hierarchy_probability(l.output + index, l.softmax_tree, class);
			float scale = l.output[index-1];
			float p = scale*get_hierarchy_probability(l.output + index, l.softmax_tree, class);
			if(p > maxp){
			maxp = p;
			maxi = n;
			@@ -324,7 +325,7 @@
			axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
			}

			void get_region_boxes(layer l, int w, int h, float thresh, float *probs, box boxes, int only_objectness)
			void get_region_boxes(layer l, int w, int h, float thresh, float *probs, box boxes, int only_objectness, int *map)
			{
			int i,j,n;
			float *predictions = l.output;
			@@ -348,8 +349,13 @@

			hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0);
			int found = 0;
			for(j = l.classes - 1; j >= 0; --j){
			if(1){
			if(map){
			for(j = 0; j < 200; ++j){
			float prob = scale*predictions[class_index+map[j]];
			probs[index][j] = (prob > thresh) ? prob : 0;
			}
			} else {
			for(j = l.classes - 1; j >= 0; --j){
			if(!found && predictions[class_index + j] > .5){
			found = 1;
			} else {
			@@ -357,12 +363,9 @@
			}
			float prob = predictions[class_index+j];
			probs[index][j] = (scale > thresh) ? prob : 0;
			}else{
			float prob = scale*predictions[class_index+j];
			probs[index][j] = (prob > thresh) ? prob : 0;
			}
			}
			}else{
			} else {
			for(j = 0; j < l.classes; ++j){
			float prob = scale*predictions[class_index+j];
			probs[index][j] = (prob > thresh) ? prob : 0;

			@@ -9,7 +9,7 @@
			region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords);
			void forward_region_layer(const region_layer l, network_state state);
			void backward_region_layer(const region_layer l, network_state state);
			void get_region_boxes(layer l, int w, int h, float thresh, float *probs, box boxes, int only_objectness);
			void get_region_boxes(layer l, int w, int h, float thresh, float *probs, box boxes, int only_objectness, int *map);
			void resize_region_layer(layer *l, int w, int h);

			#ifdef GPU