~speedprog/mtg/mtg_card_detector.git

parent: a6b2511a | patch | commit | ignore whitespace

Joseph Redmon

2016-07-19 9361292c429c0ba3400c31c7fa5d5e3d3cb6ab47

updates

99 files modified

	Makefile	8 ●●●●● patch \| view \| raw \| blame \| history
	data/labels/aeroplane.png	patch \| view \| raw \| blame \| history
	data/labels/airplane.png	patch \| view \| raw \| blame \| history
	data/labels/apple.png	patch \| view \| raw \| blame \| history
	data/labels/backpack.png	patch \| view \| raw \| blame \| history
	data/labels/banana.png	patch \| view \| raw \| blame \| history
	data/labels/baseball bat.png	patch \| view \| raw \| blame \| history
	data/labels/baseball glove.png	patch \| view \| raw \| blame \| history
	data/labels/bear.png	patch \| view \| raw \| blame \| history
	data/labels/bed.png	patch \| view \| raw \| blame \| history
	data/labels/bench.png	patch \| view \| raw \| blame \| history
	data/labels/bicycle.png	patch \| view \| raw \| blame \| history
	data/labels/bird.png	patch \| view \| raw \| blame \| history
	data/labels/boat.png	patch \| view \| raw \| blame \| history
	data/labels/book.png	patch \| view \| raw \| blame \| history
	data/labels/bottle.png	patch \| view \| raw \| blame \| history
	data/labels/bowl.png	patch \| view \| raw \| blame \| history
	data/labels/broccoli.png	patch \| view \| raw \| blame \| history
	data/labels/bus.png	patch \| view \| raw \| blame \| history
	data/labels/cake.png	patch \| view \| raw \| blame \| history
	data/labels/car.png	patch \| view \| raw \| blame \| history
	data/labels/carrot.png	patch \| view \| raw \| blame \| history
	data/labels/cat.png	patch \| view \| raw \| blame \| history
	data/labels/cell phone.png	patch \| view \| raw \| blame \| history
	data/labels/chair.png	patch \| view \| raw \| blame \| history
	data/labels/clock.png	patch \| view \| raw \| blame \| history
	data/labels/couch.png	patch \| view \| raw \| blame \| history
	data/labels/cow.png	patch \| view \| raw \| blame \| history
	data/labels/cup.png	patch \| view \| raw \| blame \| history
	data/labels/dining table.png	patch \| view \| raw \| blame \| history
	data/labels/diningtable.png	patch \| view \| raw \| blame \| history
	data/labels/dog.png	patch \| view \| raw \| blame \| history
	data/labels/donut.png	patch \| view \| raw \| blame \| history
	data/labels/elephant.png	patch \| view \| raw \| blame \| history
	data/labels/fire hydrant.png	patch \| view \| raw \| blame \| history
	data/labels/fork.png	patch \| view \| raw \| blame \| history
	data/labels/frisbee.png	patch \| view \| raw \| blame \| history
	data/labels/giraffe.png	patch \| view \| raw \| blame \| history
	data/labels/hair drier.png	patch \| view \| raw \| blame \| history
	data/labels/handbag.png	patch \| view \| raw \| blame \| history
	data/labels/horse.png	patch \| view \| raw \| blame \| history
	data/labels/hot dog.png	patch \| view \| raw \| blame \| history
	data/labels/keyboard.png	patch \| view \| raw \| blame \| history
	data/labels/kite.png	patch \| view \| raw \| blame \| history
	data/labels/knife.png	patch \| view \| raw \| blame \| history
	data/labels/laptop.png	patch \| view \| raw \| blame \| history
	data/labels/microwave.png	patch \| view \| raw \| blame \| history
	data/labels/motorbike.png	patch \| view \| raw \| blame \| history
	data/labels/motorcycle.png	patch \| view \| raw \| blame \| history
	data/labels/mouse.png	patch \| view \| raw \| blame \| history
	data/labels/orange.png	patch \| view \| raw \| blame \| history
	data/labels/oven.png	patch \| view \| raw \| blame \| history
	data/labels/parking meter.png	patch \| view \| raw \| blame \| history
	data/labels/person.png	patch \| view \| raw \| blame \| history
	data/labels/pizza.png	patch \| view \| raw \| blame \| history
	data/labels/potted plant.png	patch \| view \| raw \| blame \| history
	data/labels/pottedplant.png	patch \| view \| raw \| blame \| history
	data/labels/refrigerator.png	patch \| view \| raw \| blame \| history
	data/labels/remote.png	patch \| view \| raw \| blame \| history
	data/labels/sandwich.png	patch \| view \| raw \| blame \| history
	data/labels/scissors.png	patch \| view \| raw \| blame \| history
	data/labels/sheep.png	patch \| view \| raw \| blame \| history
	data/labels/sink.png	patch \| view \| raw \| blame \| history
	data/labels/skateboard.png	patch \| view \| raw \| blame \| history
	data/labels/skis.png	patch \| view \| raw \| blame \| history
	data/labels/snowboard.png	patch \| view \| raw \| blame \| history
	data/labels/sofa.png	patch \| view \| raw \| blame \| history
	data/labels/spoon.png	patch \| view \| raw \| blame \| history
	data/labels/sports ball.png	patch \| view \| raw \| blame \| history
	data/labels/stop sign.png	patch \| view \| raw \| blame \| history
	data/labels/suitcase.png	patch \| view \| raw \| blame \| history
	data/labels/surfboard.png	patch \| view \| raw \| blame \| history
	data/labels/teddy bear.png	patch \| view \| raw \| blame \| history
	data/labels/tennis racket.png	patch \| view \| raw \| blame \| history
	data/labels/tie.png	patch \| view \| raw \| blame \| history
	data/labels/toaster.png	patch \| view \| raw \| blame \| history
	data/labels/toilet.png	patch \| view \| raw \| blame \| history
	data/labels/toothbrush.png	patch \| view \| raw \| blame \| history
	data/labels/traffic light.png	patch \| view \| raw \| blame \| history
	data/labels/train.png	patch \| view \| raw \| blame \| history
	data/labels/truck.png	patch \| view \| raw \| blame \| history
	data/labels/tv.png	patch \| view \| raw \| blame \| history
	data/labels/tvmonitor.png	patch \| view \| raw \| blame \| history
	data/labels/umbrella.png	patch \| view \| raw \| blame \| history
	data/labels/vase.png	patch \| view \| raw \| blame \| history
	data/labels/wine glass.png	patch \| view \| raw \| blame \| history
	data/labels/zebra.png	patch \| view \| raw \| blame \| history
	src/coco.c	3 ●●●●● patch \| view \| raw \| blame \| history
	src/connected_layer.c	4 ●●●●● patch \| view \| raw \| blame \| history
	src/convolutional_layer.c	5 ●●●●● patch \| view \| raw \| blame \| history
	src/darknet.c	73 ●●●●● patch \| view \| raw \| blame \| history
	src/data.c	10 ●●●●● patch \| view \| raw \| blame \| history
	src/demo.c	2 ●●●●● patch \| view \| raw \| blame \| history
	src/detection_layer.h	4 ●●●●● patch \| view \| raw \| blame \| history
	src/image.c	7 ●●●●● patch \| view \| raw \| blame \| history
	src/layer.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/network.c	13 ●●●●● patch \| view \| raw \| blame \| history
	src/network_kernels.cu	7 ●●●●● patch \| view \| raw \| blame \| history
	src/parser.c	28 ●●●●● patch \| view \| raw \| blame \| history

 Makefile

@@ -1,6 +1,6 @@
GPU=0
CUDNN=0
OPENCV=0
GPU=1
CUDNN=1
OPENCV=1
DEBUG=0

ARCH= --gpu-architecture=compute_52 --gpu-code=compute_52
@@ -41,7 +41,7 @@
LDFLAGS+= -lcudnn
endif

OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o
ifeq ($(GPU), 1) 
LDFLAGS+= -lstdc++ 
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o

 data/labels/aeroplane.png



 data/labels/airplane.png



 data/labels/apple.png



 data/labels/backpack.png



 data/labels/banana.png



 data/labels/baseball bat.png



 data/labels/baseball glove.png



 data/labels/bear.png



 data/labels/bed.png



 data/labels/bench.png



 data/labels/bicycle.png



 data/labels/bird.png



 data/labels/boat.png



 data/labels/book.png



 data/labels/bottle.png



 data/labels/bowl.png



 data/labels/broccoli.png



 data/labels/bus.png



 data/labels/cake.png



 data/labels/car.png



 data/labels/carrot.png



 data/labels/cat.png



 data/labels/cell phone.png



 data/labels/chair.png



 data/labels/clock.png



 data/labels/couch.png



 data/labels/cow.png



 data/labels/cup.png



 data/labels/dining table.png



 data/labels/diningtable.png



 data/labels/dog.png



 data/labels/donut.png



 data/labels/elephant.png



 data/labels/fire hydrant.png



 data/labels/fork.png



 data/labels/frisbee.png



 data/labels/giraffe.png



 data/labels/hair drier.png



 data/labels/handbag.png



 data/labels/horse.png



 data/labels/hot dog.png



 data/labels/keyboard.png



 data/labels/kite.png



 data/labels/knife.png



 data/labels/laptop.png



 data/labels/microwave.png



 data/labels/motorbike.png



 data/labels/motorcycle.png



 data/labels/mouse.png



 data/labels/orange.png



 data/labels/oven.png



 data/labels/parking meter.png



 data/labels/person.png



 data/labels/pizza.png



 data/labels/potted plant.png



 data/labels/pottedplant.png



 data/labels/refrigerator.png



 data/labels/remote.png



 data/labels/sandwich.png



 data/labels/scissors.png



 data/labels/sheep.png



 data/labels/sink.png



 data/labels/skateboard.png



 data/labels/skis.png



 data/labels/snowboard.png



 data/labels/sofa.png



 data/labels/spoon.png



 data/labels/sports ball.png



 data/labels/stop sign.png



 data/labels/suitcase.png



 data/labels/surfboard.png



 data/labels/teddy bear.png



 data/labels/tennis racket.png



 data/labels/tie.png



 data/labels/toaster.png



 data/labels/toilet.png



 data/labels/toothbrush.png



 data/labels/traffic light.png



 data/labels/train.png



 data/labels/truck.png



 data/labels/tv.png



 data/labels/tvmonitor.png



 data/labels/umbrella.png



 data/labels/vase.png



 data/labels/wine glass.png



 data/labels/zebra.png



 src/coco.c

@@ -348,9 +348,8 @@
        convert_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0);
        if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
        draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, coco_labels, 80);
        save_image(im, "prediction");
        show_image(im, "predictions");

        show_image(sized, "resized");
        free_image(im);
        free_image(sized);
#ifdef OPENCV

 src/connected_layer.c

@@ -192,6 +192,9 @@
            l.weights[i*l.inputs + j] *= scale;
        }
        l.biases[i] -= l.rolling_mean[i] * scale;
        l.scales[i] = 1;
        l.rolling_mean[i] = 0;
        l.rolling_variance[i] = 1;
    }
}

@@ -257,7 +260,6 @@
        axpy_ongpu(l.outputs, 1, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1);
    }
    activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);

}

void backward_connected_layer_gpu(connected_layer l, network_state state)

 src/convolutional_layer.c

@@ -301,6 +301,9 @@
            l.filters[i*l.c*l.size*l.size + j] *= scale;
        }
        l.biases[i] -= l.rolling_mean[i] * scale;
        l.scales[i] = 1;
        l.rolling_mean[i] = 0;
        l.rolling_variance[i] = 1;
    }
}

@@ -434,7 +437,7 @@
       }
     */

    if(l.xnor ){
    if(l.xnor){
        binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.binary_filters);
        swap_binary(&l);
        binarize_cpu(state.input, l.c*l.h*l.w*l.batch, l.binary_input);

 src/darknet.c

@@ -14,6 +14,7 @@

extern void run_imagenet(int argc, char **argv);
extern void run_yolo(int argc, char **argv);
extern void run_detector(int argc, char **argv);
extern void run_coco(int argc, char **argv);
extern void run_writing(int argc, char **argv);
extern void run_captcha(int argc, char **argv);
@@ -97,12 +98,13 @@
    for(i = 0; i < net.n; ++i){
        layer l = net.layers[i];
        if(l.type == CONVOLUTIONAL){
            ops += 2 * l.n * l.size*l.size*l.c * l.out_h*l.out_w;
            ops += 2l * l.n * l.size*l.size*l.c * l.out_h*l.out_w;
        } else if(l.type == CONNECTED){
            ops += 2 * l.inputs * l.outputs;
            ops += 2l * l.inputs * l.outputs;
        }
    }
    printf("Floating Point Operations: %ld\n", ops);
    printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.);
}

void partial(char *cfgfile, char *weightfile, char *outfile, int max)
@@ -164,6 +166,47 @@
    save_weights(net, outfile);
}

void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile)
{
    gpu_index = -1;
    network net = parse_network_cfg(cfgfile);
    if (weightfile) {
        load_weights(&net, weightfile);
    }
    int i;
    for (i = 0; i < net.n; ++i) {
        layer l = net.layers[i];
        if (l.type == CONVOLUTIONAL && l.batch_normalize) {
            denormalize_convolutional_layer(l);
        }
        if (l.type == CONNECTED && l.batch_normalize) {
            denormalize_connected_layer(l);
        }
        if (l.type == GRU && l.batch_normalize) {
            denormalize_connected_layer(*l.input_z_layer);
            denormalize_connected_layer(*l.input_r_layer);
            denormalize_connected_layer(*l.input_h_layer);
            denormalize_connected_layer(*l.state_z_layer);
            denormalize_connected_layer(*l.state_r_layer);
            denormalize_connected_layer(*l.state_h_layer);
        }
    }
    save_weights(net, outfile);
}

layer normalize_layer(layer l, int n)
{
    int j;
    l.batch_normalize=1;
    l.scales = calloc(n, sizeof(float));
    for(j = 0; j < n; ++j){
        l.scales[j] = 1;
    }
    l.rolling_mean = calloc(n, sizeof(float));
    l.rolling_variance = calloc(n, sizeof(float));
    return l;
}

void normalize_net(char *cfgfile, char *weightfile, char *outfile)
{
    gpu_index = -1;
@@ -171,17 +214,23 @@
    if(weightfile){
        load_weights(&net, weightfile);
    }
    int i, j;
    int i;
    for(i = 0; i < net.n; ++i){
        layer l = net.layers[i];
        if(l.type == CONVOLUTIONAL){
        if(l.type == CONVOLUTIONAL && !l.batch_normalize){
            net.layers[i] = normalize_layer(l, l.n);
        }
        if (l.type == CONNECTED && !l.batch_normalize) {
            net.layers[i] = normalize_layer(l, l.outputs);
        }
        if (l.type == GRU && l.batch_normalize) {
            *l.input_z_layer = normalize_layer(*l.input_z_layer, l.input_z_layer->outputs);
            *l.input_r_layer = normalize_layer(*l.input_r_layer, l.input_r_layer->outputs);
            *l.input_h_layer = normalize_layer(*l.input_h_layer, l.input_h_layer->outputs);
            *l.state_z_layer = normalize_layer(*l.state_z_layer, l.state_z_layer->outputs);
            *l.state_r_layer = normalize_layer(*l.state_r_layer, l.state_r_layer->outputs);
            *l.state_h_layer = normalize_layer(*l.state_h_layer, l.state_h_layer->outputs);
            net.layers[i].batch_normalize=1;
            net.layers[i].scales = calloc(l.n, sizeof(float));
            for(j = 0; j < l.n; ++j){
                net.layers[i].scales[i] = 1;
            }
            net.layers[i].rolling_mean = calloc(l.n, sizeof(float));
            net.layers[i].rolling_variance = calloc(l.n, sizeof(float));
        }
    }
    save_weights(net, outfile);
@@ -265,6 +314,8 @@
        average(argc, argv);
    } else if (0 == strcmp(argv[1], "yolo")){
        run_yolo(argc, argv);
    } else if (0 == strcmp(argv[1], "detector")){
        run_detector(argc, argv);
    } else if (0 == strcmp(argv[1], "cifar")){
        run_cifar(argc, argv);
    } else if (0 == strcmp(argv[1], "go")){
@@ -299,6 +350,8 @@
        change_rate(argv[2], atof(argv[3]), (argc > 4) ? atof(argv[4]) : 0);
    } else if (0 == strcmp(argv[1], "rgbgr")){
        rgbgr_net(argv[2], argv[3], argv[4]);
    } else if (0 == strcmp(argv[1], "reset")){
        reset_normalize_net(argv[2], argv[3], argv[4]);
    } else if (0 == strcmp(argv[1], "denormalize")){
        denormalize_net(argv[2], argv[3], argv[4]);
    } else if (0 == strcmp(argv[1], "normalize")){

 src/data.c

@@ -297,11 +297,11 @@

        if (w < .01 || h < .01) continue;

        truth[i*5+0] = id;
        truth[i*5+1] = x;
        truth[i*5+2] = y;
        truth[i*5+3] = w;
        truth[i*5+4] = h;
        truth[i*5+0] = x;
        truth[i*5+1] = y;
        truth[i*5+2] = w;
        truth[i*5+3] = h;
        truth[i*5+4] = id;
    }
    free(boxes);
}

 src/demo.c

@@ -8,7 +8,7 @@
#include "demo.h"
#include <sys/time.h>

#define FRAMES 3
#define FRAMES 1

#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"

 src/detection_layer.h

@@ -1,5 +1,5 @@
#ifndef REGION_LAYER_H
#define REGION_LAYER_H
#ifndef DETECTION_LAYER_H
#define DETECTION_LAYER_H

#include "layer.h"
#include "network.h"

 src/image.c

@@ -109,14 +109,17 @@
        int class = max_index(probs[i], classes);
        float prob = probs[i][class];
        if(prob > thresh){
            int width = pow(prob, 1./2.)*10+1;
            width = 8;
            //int width = pow(prob, 1./2.)*30+1;
            int width = 8;
            printf("%s: %.0f%%\n", names[class], prob*100);
            int offset = class*1 % classes;
            float red = get_color(2,offset,classes);
            float green = get_color(1,offset,classes);
            float blue = get_color(0,offset,classes);
            float rgb[3];

            //width = prob*20+2;

            rgb[0] = red;
            rgb[1] = green;
            rgb[2] = blue;

 src/layer.h

@@ -29,6 +29,7 @@
    BATCHNORM,
    NETWORK,
    XNOR,
    REGION,
    BLANK
} LAYER_TYPE;


 src/network.c

@@ -16,6 +16,7 @@
#include "activation_layer.h"
#include "deconvolutional_layer.h"
#include "detection_layer.h"
#include "region_layer.h"
#include "normalization_layer.h"
#include "batchnorm_layer.h"
#include "maxpool_layer.h"
@@ -103,6 +104,8 @@
            return "softmax";
        case DETECTION:
            return "detection";
        case REGION:
            return "region";
        case DROPOUT:
            return "dropout";
        case CROP:
@@ -160,6 +163,8 @@
            forward_batchnorm_layer(l, state);
        } else if(l.type == DETECTION){
            forward_detection_layer(l, state);
        } else if(l.type == REGION){
            forward_region_layer(l, state);
        } else if(l.type == CONNECTED){
            forward_connected_layer(l, state);
        } else if(l.type == RNN){
@@ -230,11 +235,7 @@
    float sum = 0;
    int count = 0;
    for(i = 0; i < net.n; ++i){
        if(net.layers[i].type == COST){
            sum += net.layers[i].cost[0];
            ++count;
        }
        if(net.layers[i].type == DETECTION){
        if(net.layers[i].cost){
            sum += net.layers[i].cost[0];
            ++count;
        }
@@ -284,6 +285,8 @@
            backward_dropout_layer(l, state);
        } else if(l.type == DETECTION){
            backward_detection_layer(l, state);
        } else if(l.type == REGION){
            backward_region_layer(l, state);
        } else if(l.type == SOFTMAX){
            if(i != 0) backward_softmax_layer(l, state);
        } else if(l.type == CONNECTED){

 src/network_kernels.cu

@@ -19,6 +19,7 @@
#include "gru_layer.h"
#include "crnn_layer.h"
#include "detection_layer.h"
#include "region_layer.h"
#include "convolutional_layer.h"
#include "activation_layer.h"
#include "deconvolutional_layer.h"
@@ -59,6 +60,8 @@
            forward_local_layer_gpu(l, state);
        } else if(l.type == DETECTION){
            forward_detection_layer_gpu(l, state);
        } else if(l.type == REGION){
            forward_region_layer_gpu(l, state);
        } else if(l.type == CONNECTED){
            forward_connected_layer_gpu(l, state);
        } else if(l.type == RNN){
@@ -125,6 +128,8 @@
            backward_dropout_layer_gpu(l, state);
        } else if(l.type == DETECTION){
            backward_detection_layer_gpu(l, state);
        } else if(l.type == REGION){
            backward_region_layer_gpu(l, state);
        } else if(l.type == NORMALIZATION){
            backward_normalization_layer_gpu(l, state);
        } else if(l.type == BATCHNORM){
@@ -181,7 +186,7 @@
    state.net = net;
    int x_size = get_network_input_size(net)*net.batch;
    int y_size = get_network_output_size(net)*net.batch;
    if(net.layers[net.n-1].type == DETECTION) y_size = net.layers[net.n-1].truths*net.batch;
    if(net.layers[net.n-1].truths) y_size = net.layers[net.n-1].truths*net.batch;
    if(!*net.input_gpu){
        *net.input_gpu = cuda_make_array(x, x_size);
        *net.truth_gpu = cuda_make_array(y, y_size);

 src/parser.c

@@ -19,6 +19,7 @@
#include "softmax_layer.h"
#include "dropout_layer.h"
#include "detection_layer.h"
#include "region_layer.h"
#include "avgpool_layer.h"
#include "local_layer.h"
#include "route_layer.h"
@@ -51,6 +52,7 @@
int is_shortcut(section *s);
int is_cost(section *s);
int is_detection(section *s);
int is_region(section *s);
int is_route(section *s);
list *read_cfg(char *filename);

@@ -245,6 +247,25 @@
    return layer;
}

layer parse_region(list *options, size_params params)
{
    int coords = option_find_int(options, "coords", 4);
    int classes = option_find_int(options, "classes", 20);
    int num = option_find_int(options, "num", 1);
    layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords);
    assert(l.outputs == params.inputs);

    l.softmax = option_find_int(options, "softmax", 0);
    l.max_boxes = option_find_int_quiet(options, "max",30);
    l.jitter = option_find_float(options, "jitter", .2);
    l.rescore = option_find_int_quiet(options, "rescore",0);

    l.coord_scale = option_find_float(options, "coord_scale", 1);
    l.object_scale = option_find_float(options, "object_scale", 1);
    l.noobject_scale = option_find_float(options, "noobject_scale", 1);
    l.class_scale = option_find_float(options, "class_scale", 1);
    return l;
}
detection_layer parse_detection(list *options, size_params params)
{
    int coords = option_find_int(options, "coords", 1);
@@ -557,6 +578,8 @@
            l = parse_crop(options, params);
        }else if(is_cost(s)){
            l = parse_cost(options, params);
        }else if(is_region(s)){
            l = parse_region(options, params);
        }else if(is_detection(s)){
            l = parse_detection(options, params);
        }else if(is_softmax(s)){
@@ -620,6 +643,7 @@
    if (strcmp(type, "[crop]")==0) return CROP;
    if (strcmp(type, "[cost]")==0) return COST;
    if (strcmp(type, "[detection]")==0) return DETECTION;
    if (strcmp(type, "[region]")==0) return REGION;
    if (strcmp(type, "[local]")==0) return LOCAL;
    if (strcmp(type, "[deconv]")==0
            || strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL;
@@ -659,6 +683,10 @@
{
    return (strcmp(s->type, "[cost]")==0);
}
int is_region(section *s)
{
    return (strcmp(s->type, "[region]")==0);
}
int is_detection(section *s)
{
    return (strcmp(s->type, "[detection]")==0);

			@@ -1,6 +1,6 @@
			GPU=0
			CUDNN=0
			OPENCV=0
			GPU=1
			CUDNN=1
			OPENCV=1
			DEBUG=0

			ARCH= --gpu-architecture=compute_52 --gpu-code=compute_52
			@@ -41,7 +41,7 @@
			LDFLAGS+= -lcudnn
			endif

			OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o
			OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o
			ifeq ($(GPU), 1)
			LDFLAGS+= -lstdc++
			OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o

			@@ -348,9 +348,8 @@
			convert_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0);
			if (nms) do_nms_sort(boxes, probs, l.sidel.sidel.n, l.classes, nms);
			draw_detections(im, l.sidel.sidel.n, thresh, boxes, probs, coco_classes, coco_labels, 80);
			save_image(im, "prediction");
			show_image(im, "predictions");

			show_image(sized, "resized");
			free_image(im);
			free_image(sized);
			#ifdef OPENCV

			@@ -192,6 +192,9 @@
			l.weights[il.inputs + j] = scale;
			}
			l.biases[i] -= l.rolling_mean[i] * scale;
			l.scales[i] = 1;
			l.rolling_mean[i] = 0;
			l.rolling_variance[i] = 1;
			}
			}

			@@ -257,7 +260,6 @@
			axpy_ongpu(l.outputs, 1, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1);
			}
			activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);

			}

			void backward_connected_layer_gpu(connected_layer l, network_state state)

			@@ -301,6 +301,9 @@
			l.filters[il.cl.sizel.size + j] = scale;
			}
			l.biases[i] -= l.rolling_mean[i] * scale;
			l.scales[i] = 1;
			l.rolling_mean[i] = 0;
			l.rolling_variance[i] = 1;
			}
			}

			@@ -434,7 +437,7 @@
			}
			*/

			if(l.xnor ){
			if(l.xnor){
			binarize_filters(l.filters, l.n, l.cl.sizel.size, l.binary_filters);
			swap_binary(&l);
			binarize_cpu(state.input, l.cl.hl.w*l.batch, l.binary_input);

			@@ -14,6 +14,7 @@

			extern void run_imagenet(int argc, char **argv);
			extern void run_yolo(int argc, char **argv);
			extern void run_detector(int argc, char **argv);
			extern void run_coco(int argc, char **argv);
			extern void run_writing(int argc, char **argv);
			extern void run_captcha(int argc, char **argv);
			@@ -97,12 +98,13 @@
			for(i = 0; i < net.n; ++i){
			layer l = net.layers[i];
			if(l.type == CONVOLUTIONAL){
			ops += 2 * l.n * l.sizel.sizel.c * l.out_h*l.out_w;
			ops += 2l * l.n * l.sizel.sizel.c * l.out_h*l.out_w;
			} else if(l.type == CONNECTED){
			ops += 2 * l.inputs * l.outputs;
			ops += 2l * l.inputs * l.outputs;
			}
			}
			printf("Floating Point Operations: %ld\n", ops);
			printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.);
			}

			void partial(char cfgfile, char weightfile, char *outfile, int max)
			@@ -164,6 +166,47 @@
			save_weights(net, outfile);
			}

			void reset_normalize_net(char cfgfile, char weightfile, char *outfile)
			{
			gpu_index = -1;
			network net = parse_network_cfg(cfgfile);
			if (weightfile) {
			load_weights(&net, weightfile);
			}
			int i;
			for (i = 0; i < net.n; ++i) {
			layer l = net.layers[i];
			if (l.type == CONVOLUTIONAL && l.batch_normalize) {
			denormalize_convolutional_layer(l);
			}
			if (l.type == CONNECTED && l.batch_normalize) {
			denormalize_connected_layer(l);
			}
			if (l.type == GRU && l.batch_normalize) {
			denormalize_connected_layer(*l.input_z_layer);
			denormalize_connected_layer(*l.input_r_layer);
			denormalize_connected_layer(*l.input_h_layer);
			denormalize_connected_layer(*l.state_z_layer);
			denormalize_connected_layer(*l.state_r_layer);
			denormalize_connected_layer(*l.state_h_layer);
			}
			}
			save_weights(net, outfile);
			}

			layer normalize_layer(layer l, int n)
			{
			int j;
			l.batch_normalize=1;
			l.scales = calloc(n, sizeof(float));
			for(j = 0; j < n; ++j){
			l.scales[j] = 1;
			}
			l.rolling_mean = calloc(n, sizeof(float));
			l.rolling_variance = calloc(n, sizeof(float));
			return l;
			}

			void normalize_net(char cfgfile, char weightfile, char *outfile)
			{
			gpu_index = -1;
			@@ -171,17 +214,23 @@
			if(weightfile){
			load_weights(&net, weightfile);
			}
			int i, j;
			int i;
			for(i = 0; i < net.n; ++i){
			layer l = net.layers[i];
			if(l.type == CONVOLUTIONAL){
			if(l.type == CONVOLUTIONAL && !l.batch_normalize){
			net.layers[i] = normalize_layer(l, l.n);
			}
			if (l.type == CONNECTED && !l.batch_normalize) {
			net.layers[i] = normalize_layer(l, l.outputs);
			}
			if (l.type == GRU && l.batch_normalize) {
			l.input_z_layer = normalize_layer(l.input_z_layer, l.input_z_layer->outputs);
			l.input_r_layer = normalize_layer(l.input_r_layer, l.input_r_layer->outputs);
			l.input_h_layer = normalize_layer(l.input_h_layer, l.input_h_layer->outputs);
			l.state_z_layer = normalize_layer(l.state_z_layer, l.state_z_layer->outputs);
			l.state_r_layer = normalize_layer(l.state_r_layer, l.state_r_layer->outputs);
			l.state_h_layer = normalize_layer(l.state_h_layer, l.state_h_layer->outputs);
			net.layers[i].batch_normalize=1;
			net.layers[i].scales = calloc(l.n, sizeof(float));
			for(j = 0; j < l.n; ++j){
			net.layers[i].scales[i] = 1;
			}
			net.layers[i].rolling_mean = calloc(l.n, sizeof(float));
			net.layers[i].rolling_variance = calloc(l.n, sizeof(float));
			}
			}
			save_weights(net, outfile);
			@@ -265,6 +314,8 @@
			average(argc, argv);
			} else if (0 == strcmp(argv[1], "yolo")){
			run_yolo(argc, argv);
			} else if (0 == strcmp(argv[1], "detector")){
			run_detector(argc, argv);
			} else if (0 == strcmp(argv[1], "cifar")){
			run_cifar(argc, argv);
			} else if (0 == strcmp(argv[1], "go")){
			@@ -299,6 +350,8 @@
			change_rate(argv[2], atof(argv[3]), (argc > 4) ? atof(argv[4]) : 0);
			} else if (0 == strcmp(argv[1], "rgbgr")){
			rgbgr_net(argv[2], argv[3], argv[4]);
			} else if (0 == strcmp(argv[1], "reset")){
			reset_normalize_net(argv[2], argv[3], argv[4]);
			} else if (0 == strcmp(argv[1], "denormalize")){
			denormalize_net(argv[2], argv[3], argv[4]);
			} else if (0 == strcmp(argv[1], "normalize")){

			@@ -297,11 +297,11 @@

			if (w < .01 \|\| h < .01) continue;

			truth[i*5+0] = id;
			truth[i*5+1] = x;
			truth[i*5+2] = y;
			truth[i*5+3] = w;
			truth[i*5+4] = h;
			truth[i*5+0] = x;
			truth[i*5+1] = y;
			truth[i*5+2] = w;
			truth[i*5+3] = h;
			truth[i*5+4] = id;
			}
			free(boxes);
			}

			@@ -8,7 +8,7 @@
			#include "demo.h"
			#include <sys/time.h>

			#define FRAMES 3
			#define FRAMES 1

			#ifdef OPENCV
			#include "opencv2/highgui/highgui_c.h"

			@@ -1,5 +1,5 @@
			#ifndef REGION_LAYER_H
			#define REGION_LAYER_H
			#ifndef DETECTION_LAYER_H
			#define DETECTION_LAYER_H

			#include "layer.h"
			#include "network.h"

			@@ -109,14 +109,17 @@
			int class = max_index(probs[i], classes);
			float prob = probs[i][class];
			if(prob > thresh){
			int width = pow(prob, 1./2.)*10+1;
			width = 8;
			//int width = pow(prob, 1./2.)*30+1;
			int width = 8;
			printf("%s: %.0f%%\n", names[class], prob*100);
			int offset = class*1 % classes;
			float red = get_color(2,offset,classes);
			float green = get_color(1,offset,classes);
			float blue = get_color(0,offset,classes);
			float rgb[3];

			//width = prob*20+2;

			rgb[0] = red;
			rgb[1] = green;
			rgb[2] = blue;

			@@ -29,6 +29,7 @@
			BATCHNORM,
			NETWORK,
			XNOR,
			REGION,
			BLANK
			} LAYER_TYPE;

			@@ -16,6 +16,7 @@
			#include "activation_layer.h"
			#include "deconvolutional_layer.h"
			#include "detection_layer.h"
			#include "region_layer.h"
			#include "normalization_layer.h"
			#include "batchnorm_layer.h"
			#include "maxpool_layer.h"
			@@ -103,6 +104,8 @@
			return "softmax";
			case DETECTION:
			return "detection";
			case REGION:
			return "region";
			case DROPOUT:
			return "dropout";
			case CROP:
			@@ -160,6 +163,8 @@
			forward_batchnorm_layer(l, state);
			} else if(l.type == DETECTION){
			forward_detection_layer(l, state);
			} else if(l.type == REGION){
			forward_region_layer(l, state);
			} else if(l.type == CONNECTED){
			forward_connected_layer(l, state);
			} else if(l.type == RNN){
			@@ -230,11 +235,7 @@
			float sum = 0;
			int count = 0;
			for(i = 0; i < net.n; ++i){
			if(net.layers[i].type == COST){
			sum += net.layers[i].cost[0];
			++count;
			}
			if(net.layers[i].type == DETECTION){
			if(net.layers[i].cost){
			sum += net.layers[i].cost[0];
			++count;
			}
			@@ -284,6 +285,8 @@
			backward_dropout_layer(l, state);
			} else if(l.type == DETECTION){
			backward_detection_layer(l, state);
			} else if(l.type == REGION){
			backward_region_layer(l, state);
			} else if(l.type == SOFTMAX){
			if(i != 0) backward_softmax_layer(l, state);
			} else if(l.type == CONNECTED){

			@@ -19,6 +19,7 @@
			#include "gru_layer.h"
			#include "crnn_layer.h"
			#include "detection_layer.h"
			#include "region_layer.h"
			#include "convolutional_layer.h"
			#include "activation_layer.h"
			#include "deconvolutional_layer.h"
			@@ -59,6 +60,8 @@
			forward_local_layer_gpu(l, state);
			} else if(l.type == DETECTION){
			forward_detection_layer_gpu(l, state);
			} else if(l.type == REGION){
			forward_region_layer_gpu(l, state);
			} else if(l.type == CONNECTED){
			forward_connected_layer_gpu(l, state);
			} else if(l.type == RNN){
			@@ -125,6 +128,8 @@
			backward_dropout_layer_gpu(l, state);
			} else if(l.type == DETECTION){
			backward_detection_layer_gpu(l, state);
			} else if(l.type == REGION){
			backward_region_layer_gpu(l, state);
			} else if(l.type == NORMALIZATION){
			backward_normalization_layer_gpu(l, state);
			} else if(l.type == BATCHNORM){
			@@ -181,7 +186,7 @@
			state.net = net;
			int x_size = get_network_input_size(net)*net.batch;
			int y_size = get_network_output_size(net)*net.batch;
			if(net.layers[net.n-1].type == DETECTION) y_size = net.layers[net.n-1].truths*net.batch;
			if(net.layers[net.n-1].truths) y_size = net.layers[net.n-1].truths*net.batch;
			if(!*net.input_gpu){
			*net.input_gpu = cuda_make_array(x, x_size);
			*net.truth_gpu = cuda_make_array(y, y_size);

			@@ -19,6 +19,7 @@
			#include "softmax_layer.h"
			#include "dropout_layer.h"
			#include "detection_layer.h"
			#include "region_layer.h"
			#include "avgpool_layer.h"
			#include "local_layer.h"
			#include "route_layer.h"
			@@ -51,6 +52,7 @@
			int is_shortcut(section *s);
			int is_cost(section *s);
			int is_detection(section *s);
			int is_region(section *s);
			int is_route(section *s);
			list read_cfg(char filename);

			@@ -245,6 +247,25 @@
			return layer;
			}

			layer parse_region(list *options, size_params params)
			{
			int coords = option_find_int(options, "coords", 4);
			int classes = option_find_int(options, "classes", 20);
			int num = option_find_int(options, "num", 1);
			layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords);
			assert(l.outputs == params.inputs);

			l.softmax = option_find_int(options, "softmax", 0);
			l.max_boxes = option_find_int_quiet(options, "max",30);
			l.jitter = option_find_float(options, "jitter", .2);
			l.rescore = option_find_int_quiet(options, "rescore",0);

			l.coord_scale = option_find_float(options, "coord_scale", 1);
			l.object_scale = option_find_float(options, "object_scale", 1);
			l.noobject_scale = option_find_float(options, "noobject_scale", 1);
			l.class_scale = option_find_float(options, "class_scale", 1);
			return l;
			}
			detection_layer parse_detection(list *options, size_params params)
			{
			int coords = option_find_int(options, "coords", 1);
			@@ -557,6 +578,8 @@
			l = parse_crop(options, params);
			}else if(is_cost(s)){
			l = parse_cost(options, params);
			}else if(is_region(s)){
			l = parse_region(options, params);
			}else if(is_detection(s)){
			l = parse_detection(options, params);
			}else if(is_softmax(s)){
			@@ -620,6 +643,7 @@
			if (strcmp(type, "[crop]")==0) return CROP;
			if (strcmp(type, "[cost]")==0) return COST;
			if (strcmp(type, "[detection]")==0) return DETECTION;
			if (strcmp(type, "[region]")==0) return REGION;
			if (strcmp(type, "[local]")==0) return LOCAL;
			if (strcmp(type, "[deconv]")==0
			\|\| strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL;
			@@ -659,6 +683,10 @@
			{
			return (strcmp(s->type, "[cost]")==0);
			}
			int is_region(section *s)
			{
			return (strcmp(s->type, "[region]")==0);
			}
			int is_detection(section *s)
			{
			return (strcmp(s->type, "[detection]")==0);