~speedprog/mtg/mtg_card_detector.git

parent: c899cc19 | patch | commit | ignore whitespace

So I have this new programming paradigm.......

Joseph Redmon

2016-09-25 481b57a96a9ef29b112caec1bb3e17ffb043ceae

So I have this new programming paradigm.......

47 files modified

2 files deleted

	Makefile	4 ●●●●● patch \| view \| raw \| blame \| history
	data/labels/make_labels.py	17 ●●●●● patch \| view \| raw \| blame \| history
	src/activation_layer.c	5 ●●●●● patch \| view \| raw \| blame \| history
	src/art.c	1 ●●●●● patch \| view \| raw \| blame \| history
	src/avgpool_layer.c	4 ●●●●● patch \| view \| raw \| blame \| history
	src/batchnorm_layer.c	6 ●●●●● patch \| view \| raw \| blame \| history
	src/classifier.c	14 ●●●●● patch \| view \| raw \| blame \| history
	src/coco.c	40 ●●●●● patch \| view \| raw \| blame \| history
	src/connected_layer.c	8 ●●●●● patch \| view \| raw \| blame \| history
	src/convolutional_layer.c	7 ●●●●● patch \| view \| raw \| blame \| history
	src/cost_layer.c	6 ●●●●● patch \| view \| raw \| blame \| history
	src/crnn_layer.c	8 ●●●●● patch \| view \| raw \| blame \| history
	src/crop_layer.c	8 ●●●●● patch \| view \| raw \| blame \| history
	src/darknet.c	13 ●●●●● patch \| view \| raw \| blame \| history
	src/data.c	60 ●●●●● patch \| view \| raw \| blame \| history
	src/deconvolutional_layer.c	4 ●●●●● patch \| view \| raw \| blame \| history
	src/demo.c	47 ●●●●● patch \| view \| raw \| blame \| history
	src/demo.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/detection_layer.c	34 ●●●●● patch \| view \| raw \| blame \| history
	src/detection_layer.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/detector.c	121 ●●●●● patch \| view \| raw \| blame \| history
	src/dropout_layer.c	4 ●●●●● patch \| view \| raw \| blame \| history
	src/gru_layer.c	8 ●●●●● patch \| view \| raw \| blame \| history
	src/gru_layer.h	23 ●●●●● patch \| view \| raw \| blame \| history
	src/image.c	190 ●●●●● patch \| view \| raw \| blame \| history
	src/image.h	13 ●●●●● patch \| view \| raw \| blame \| history
	src/layer.h	8 ●●●●● patch \| view \| raw \| blame \| history
	src/local_layer.c	8 ●●●●● patch \| view \| raw \| blame \| history
	src/maxpool_layer.c	4 ●●●●● patch \| view \| raw \| blame \| history
	src/network.c	113 ●●●●● patch \| view \| raw \| blame \| history
	src/network_kernels.cu	122 ●●●●● patch \| view \| raw \| blame \| history
	src/normalization_layer.c	6 ●●●●● patch \| view \| raw \| blame \| history
	src/parser.c	311 ●●●●● patch \| view \| raw \| blame \| history
	src/region_layer.c	43 ●●●●● patch \| view \| raw \| blame \| history
	src/region_layer.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/reorg_layer.c	6 ●●●●● patch \| view \| raw \| blame \| history
	src/rnn_layer.c	6 ●●●●● patch \| view \| raw \| blame \| history
	src/rnn_layer.h	23 ●●●●● patch \| view \| raw \| blame \| history
	src/rnn_vid.c	2 ●●●●● patch \| view \| raw \| blame \| history
	src/route_layer.c	22 ●●●●● patch \| view \| raw \| blame \| history
	src/route_layer.h	8 ●●●●● patch \| view \| raw \| blame \| history
	src/shortcut_layer.c	6 ●●●●● patch \| view \| raw \| blame \| history
	src/softmax_layer.c	6 ●●●●● patch \| view \| raw \| blame \| history
	src/utils.c	21 ●●●●● patch \| view \| raw \| blame \| history
	src/utils.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/voxel.c	1 ●●●●● patch \| view \| raw \| blame \| history
	src/xnor_layer.c	86 ●●●●● patch \| view \| raw \| blame \| history
	src/xnor_layer.h	11 ●●●●● patch \| view \| raw \| blame \| history
	src/yolo.c	79 ●●●●● patch \| view \| raw \| blame \| history

 Makefile

@@ -41,10 +41,10 @@
LDFLAGS+= -lcudnn
endif

OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
OBJ=gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
ifeq ($(GPU), 1) 
LDFLAGS+= -lstdc++ 
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
endif

OBJS = $(addprefix $(OBJDIR), $(OBJ))

 data/labels/make_labels.py

@@ -1,6 +1,19 @@
import os
import string
import pipes

l = ["person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
#l = ["person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

l = string.printable

for word in l:
    os.system("convert -fill black -background white -bordercolor white -border 4 -font futura-normal -pointsize 18 label:\"%s\" \"%s.png\""%(word, word))
    #os.system("convert -fill black -background white -bordercolor white -border 4 -font futura-normal -pointsize 18 label:\"%s\" \"%s.png\""%(word, word))
    if word == ' ':
        os.system('convert -fill black -background white -bordercolor white -font futura-normal -pointsize 64 label:"\ " 32.png')
    elif word == '\\':
        os.system('convert -fill black -background white -bordercolor white -font futura-normal -pointsize 64 label:"\\\\\\\\" 92.png')
    elif ord(word) in [9,10,11,12,13,14]:
        pass
    else:
        os.system("convert -fill black -background white -bordercolor white -font futura-normal -pointsize 64 label:%s \"%d.png\""%(pipes.quote(word), ord(word)))


 src/activation_layer.c

@@ -21,7 +21,12 @@
    l.output = calloc(batch*inputs, sizeof(float*));
    l.delta = calloc(batch*inputs, sizeof(float*));

    l.forward = forward_activation_layer;
    l.backward = backward_activation_layer;
#ifdef GPU
    l.forward_gpu = forward_activation_layer_gpu;
    l.backward_gpu = backward_activation_layer_gpu;

    l.output_gpu = cuda_make_array(l.output, inputs*batch);
    l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
#endif

 src/art.c

@@ -8,6 +8,7 @@

#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
image get_image_from_stream(CvCapture *cap);
#endif



 src/avgpool_layer.c

@@ -19,7 +19,11 @@
    int output_size = l.outputs * batch;
    l.output =  calloc(output_size, sizeof(float));
    l.delta =   calloc(output_size, sizeof(float));
    l.forward = forward_avgpool_layer;
    l.backward = backward_avgpool_layer;
    #ifdef GPU
    l.forward_gpu = forward_avgpool_layer_gpu;
    l.backward_gpu = backward_avgpool_layer_gpu;
    l.output_gpu  = cuda_make_array(l.output, output_size);
    l.delta_gpu   = cuda_make_array(l.delta, output_size);
    #endif

 src/batchnorm_layer.c

@@ -28,7 +28,13 @@

    layer.rolling_mean = calloc(c, sizeof(float));
    layer.rolling_variance = calloc(c, sizeof(float));

    layer.forward = forward_batchnorm_layer;
    layer.backward = backward_batchnorm_layer;
#ifdef GPU
    layer.forward_gpu = forward_batchnorm_layer_gpu;
    layer.backward_gpu = backward_batchnorm_layer_gpu;

    layer.output_gpu =  cuda_make_array(layer.output, h * w * c * batch);
    layer.delta_gpu =   cuda_make_array(layer.delta, h * w * c * batch);


 src/classifier.c

@@ -10,6 +10,7 @@

#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
image get_image_from_stream(CvCapture *cap);
#endif

list *read_data_cfg(char *filename)
@@ -57,25 +58,26 @@
#ifdef GPU
    int i;

    srand(time(0));
    float avg_loss = -1;
    char *base = basecfg(cfgfile);
    printf("%s\n", base);
    printf("%d\n", ngpus);
    network *nets = calloc(ngpus, sizeof(network));

    srand(time(0));
    int seed = rand();
    for(i = 0; i < ngpus; ++i){
        srand(seed);
        cuda_set_device(gpus[i]);
        nets[i] = parse_network_cfg(cfgfile);
        if(clear) *nets[i].seen = 0;
        if(weightfile){
            load_weights(&nets[i], weightfile);
        }
    }
    network net = nets[0];
    for(i = 0; i < ngpus; ++i){
        *nets[i].seen = *net.seen;
        if(clear) *nets[i].seen = 0;
        nets[i].learning_rate *= ngpus;
    }
    srand(time(0));
    network net = nets[0];

    int imgs = net.batch * net.subdivisions * ngpus;


 src/coco.c

@@ -12,14 +12,10 @@
#include "opencv2/highgui/highgui_c.h"
#endif

void convert_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);

char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"};

int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};

image coco_labels[80];

void train_coco(char *cfgfile, char *weightfile)
{
    //char *train_images = "/home/pjreddie/data/voc/test/train.txt";
@@ -160,7 +156,6 @@

    layer l = net.layers[net.n-1];
    int classes = l.classes;
    int square = l.sqrt;
    int side = l.side;

    int j;
@@ -217,10 +212,10 @@
            char *path = paths[i+t-nthreads];
            int image_id = get_coco_image_id(path);
            float *X = val_resized[t].data;
            float *predictions = network_predict(net, X);
            network_predict(net, X);
            int w = val[t].w;
            int h = val[t].h;
            convert_detections(predictions, classes, l.n, square, side, w, h, thresh, probs, boxes, 0);
            get_detection_boxes(l, w, h, thresh, probs, boxes, 0);
            if (nms) do_nms_sort(boxes, probs, side*side*l.n, classes, iou_thresh);
            print_cocos(fp, image_id, boxes, probs, side*side*l.n, classes, w, h);
            free_image(val[t]);
@@ -250,7 +245,6 @@

    layer l = net.layers[net.n-1];
    int classes = l.classes;
    int square = l.sqrt;
    int side = l.side;

    int j, k;
@@ -282,14 +276,15 @@
        image orig = load_image_color(path, 0, 0);
        image sized = resize_image(orig, net.w, net.h);
        char *id = basecfg(path);
        float *predictions = network_predict(net, sized.data);
        convert_detections(predictions, classes, l.n, square, side, 1, 1, thresh, probs, boxes, 1);
        network_predict(net, sized.data);
        get_detection_boxes(l, 1, 1, thresh, probs, boxes, 1);
        if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms_thresh);

        char *labelpath = find_replace(path, "images", "labels");
        labelpath = find_replace(labelpath, "JPEGImages", "labels");
        labelpath = find_replace(labelpath, ".jpg", ".txt");
        labelpath = find_replace(labelpath, ".JPEG", ".txt");
        char labelpath[4096];
        find_replace(path, "images", "labels", labelpath);
        find_replace(labelpath, "JPEGImages", "labels", labelpath);
        find_replace(labelpath, ".jpg", ".txt", labelpath);
        find_replace(labelpath, ".JPEG", ".txt", labelpath);

        int num_labels = 0;
        box_label *truth = read_boxes(labelpath, &num_labels);
@@ -323,7 +318,7 @@

void test_coco(char *cfgfile, char *weightfile, char *filename, float thresh)
{

    image *alphabet = load_alphabet();
    network net = parse_network_cfg(cfgfile);
    if(weightfile){
        load_weights(&net, weightfile);
@@ -353,11 +348,11 @@
        image sized = resize_image(im, net.w, net.h);
        float *X = sized.data;
        time=clock();
        float *predictions = network_predict(net, X);
        network_predict(net, X);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        convert_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0);
        get_detection_boxes(l, 1, 1, thresh, probs, boxes, 0);
        if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
        draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, coco_labels, 80);
        draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, alphabet, 80);
        save_image(im, "prediction");
        show_image(im, "predictions");
        free_image(im);
@@ -372,12 +367,7 @@

void run_coco(int argc, char **argv)
{
    int i;
    for(i = 0; i < 80; ++i){
        char buff[256];
        sprintf(buff, "data/labels/%s.png", coco_classes[i]);
        coco_labels[i] = load_image_color(buff, 0, 0);
    }
    char *prefix = find_char_arg(argc, argv, "-prefix", 0);
    float thresh = find_float_arg(argc, argv, "-thresh", .2);
    int cam_index = find_int_arg(argc, argv, "-c", 0);
    int frame_skip = find_int_arg(argc, argv, "-s", 0);
@@ -394,5 +384,5 @@
    else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights);
    else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights);
    else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights);
    else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, coco_labels, 80, frame_skip);
    else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix);
}

 src/connected_layer.c

@@ -36,6 +36,10 @@
    l.weights = calloc(outputs*inputs, sizeof(float));
    l.biases = calloc(outputs, sizeof(float));

    l.forward = forward_connected_layer;
    l.backward = backward_connected_layer;
    l.update = update_connected_layer;

    //float scale = 1./sqrt(inputs);
    float scale = sqrt(2./inputs);
    for(i = 0; i < outputs*inputs; ++i){
@@ -66,6 +70,10 @@
    }

#ifdef GPU
    l.forward_gpu = forward_connected_layer_gpu;
    l.backward_gpu = backward_connected_layer_gpu;
    l.update_gpu = update_connected_layer_gpu;

    l.weights_gpu = cuda_make_array(l.weights, outputs*inputs);
    l.biases_gpu = cuda_make_array(l.biases, outputs);


 src/convolutional_layer.c

@@ -209,6 +209,9 @@
    l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
    l.delta  = calloc(l.batch*out_h * out_w * n, sizeof(float));

    l.forward = forward_convolutional_layer;
    l.backward = backward_convolutional_layer;
    l.update = update_convolutional_layer;
    if(binary){
        l.binary_weights = calloc(c*n*size*size, sizeof(float));
        l.cweights = calloc(c*n*size*size, sizeof(char));
@@ -234,6 +237,10 @@
    }

#ifdef GPU
    l.forward_gpu = forward_convolutional_layer_gpu;
    l.backward_gpu = backward_convolutional_layer_gpu;
    l.update_gpu = update_convolutional_layer_gpu;

    if(gpu_index >= 0){
        l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
        l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);

 src/cost_layer.c

@@ -43,7 +43,13 @@
    l.delta = calloc(inputs*batch, sizeof(float));
    l.output = calloc(inputs*batch, sizeof(float));
    l.cost = calloc(1, sizeof(float));

    l.forward = forward_cost_layer;
    l.backward = backward_cost_layer;
    #ifdef GPU
    l.forward_gpu = forward_cost_layer_gpu;
    l.backward_gpu = backward_cost_layer_gpu;

    l.delta_gpu = cuda_make_array(l.output, inputs*batch);
    l.output_gpu = cuda_make_array(l.delta, inputs*batch);
    #endif

 src/crnn_layer.c

@@ -64,7 +64,15 @@
    l.output = l.output_layer->output;
    l.delta = l.output_layer->delta;

    l.forward = forward_crnn_layer;
    l.backward = backward_crnn_layer;
    l.update = update_crnn_layer;

#ifdef GPU
    l.forward_gpu = forward_crnn_layer_gpu;
    l.backward_gpu = backward_crnn_layer_gpu;
    l.update_gpu = update_crnn_layer_gpu;

    l.state_gpu = cuda_make_array(l.state, l.hidden*batch*(steps+1));
    l.output_gpu = l.output_layer->output_gpu;
    l.delta_gpu = l.output_layer->delta_gpu;

 src/crop_layer.c

@@ -10,6 +10,9 @@
    return float_to_image(w,h,c,l.output);
}

void backward_crop_layer(const crop_layer l, network_state state){}
void backward_crop_layer_gpu(const crop_layer l, network_state state){}

crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
{
    fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
@@ -30,7 +33,12 @@
    l.inputs = l.w * l.h * l.c;
    l.outputs = l.out_w * l.out_h * l.out_c;
    l.output = calloc(l.outputs*batch, sizeof(float));
    l.forward = forward_crop_layer;
    l.backward = backward_crop_layer;

    #ifdef GPU
    l.forward_gpu = forward_crop_layer_gpu;
    l.backward_gpu = backward_crop_layer_gpu;
    l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
    l.rand_gpu   = cuda_make_array(0, l.batch*8);
    #endif

 src/darknet.c

@@ -136,17 +136,6 @@
    save_weights_upto(net, outfile, max);
}

void stacked(char *cfgfile, char *weightfile, char *outfile)
{
    gpu_index = -1;
    network net = parse_network_cfg(cfgfile);
    if(weightfile){
        load_weights(&net, weightfile);
    }
    net.seen = 0;
    save_weights_double(net, outfile);
}

#include "convolutional_layer.h"
void rescale_net(char *cfgfile, char *weightfile, char *outfile)
{
@@ -420,8 +409,6 @@
        partial(argv[2], argv[3], argv[4], atoi(argv[5]));
    } else if (0 == strcmp(argv[1], "average")){
        average(argc, argv);
    } else if (0 == strcmp(argv[1], "stacked")){
        stacked(argv[2], argv[3], argv[4]);
    } else if (0 == strcmp(argv[1], "visualize")){
        visualize(argv[2], (argc > 3) ? argv[3] : 0);
    } else if (0 == strcmp(argv[1], "imtest")){

 src/data.c

@@ -47,7 +47,7 @@
    for(i = 0; i < n; ++i){
        int index = rand()%m;
        random_paths[i] = paths[index];
        if(i == 0) printf("%s\n", paths[index]);
        //if(i == 0) printf("%s\n", paths[index]);
    }
    pthread_mutex_unlock(&mutex);
    return random_paths;
@@ -58,7 +58,8 @@
    char **replace_paths = calloc(n, sizeof(char*));
    int i;
    for(i = 0; i < n; ++i){
        char *replaced = find_replace(paths[i], find, replace);
        char replaced[4096];
        find_replace(paths[i], find, replace, replaced);
        replace_paths[i] = copy_string(replaced);
    }
    return replace_paths;
@@ -198,12 +199,13 @@

void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy)
{
    char *labelpath = find_replace(path, "images", "labels");
    labelpath = find_replace(labelpath, "JPEGImages", "labels");
    char labelpath[4096];
    find_replace(path, "images", "labels", labelpath);
    find_replace(labelpath, "JPEGImages", "labels", labelpath);
    find_replace(labelpath, ".jpg", ".txt", labelpath);
    find_replace(labelpath, ".JPG", ".txt", labelpath);
    find_replace(labelpath, ".JPEG", ".txt", labelpath);

    labelpath = find_replace(labelpath, ".jpg", ".txt");
    labelpath = find_replace(labelpath, ".JPG", ".txt");
    labelpath = find_replace(labelpath, ".JPEG", ".txt");
    int count = 0;
    box_label *boxes = read_boxes(labelpath, &count);
    randomize_boxes(boxes, count);
@@ -235,13 +237,14 @@

void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy)
{
    char *labelpath = find_replace(path, "images", "labels");
    labelpath = find_replace(labelpath, "JPEGImages", "labels");
    char labelpath[4096];
    find_replace(path, "images", "labels", labelpath);
    find_replace(labelpath, "JPEGImages", "labels", labelpath);

    labelpath = find_replace(labelpath, ".jpg", ".txt");
    labelpath = find_replace(labelpath, ".png", ".txt");
    labelpath = find_replace(labelpath, ".JPG", ".txt");
    labelpath = find_replace(labelpath, ".JPEG", ".txt");
    find_replace(labelpath, ".jpg", ".txt", labelpath);
    find_replace(labelpath, ".png", ".txt", labelpath);
    find_replace(labelpath, ".JPG", ".txt", labelpath);
    find_replace(labelpath, ".JPEG", ".txt", labelpath);
    int count = 0;
    box_label *boxes = read_boxes(labelpath, &count);
    randomize_boxes(boxes, count);
@@ -282,13 +285,14 @@

void fill_truth_detection(char *path, int num_boxes, float *truth, int classes, int flip, float dx, float dy, float sx, float sy)
{
    char *labelpath = find_replace(path, "images", "labels");
    labelpath = find_replace(labelpath, "JPEGImages", "labels");
    char labelpath[4096];
    find_replace(path, "images", "labels", labelpath);
    find_replace(labelpath, "JPEGImages", "labels", labelpath);

    labelpath = find_replace(labelpath, ".jpg", ".txt");
    labelpath = find_replace(labelpath, ".png", ".txt");
    labelpath = find_replace(labelpath, ".JPG", ".txt");
    labelpath = find_replace(labelpath, ".JPEG", ".txt");
    find_replace(labelpath, ".jpg", ".txt", labelpath);
    find_replace(labelpath, ".png", ".txt", labelpath);
    find_replace(labelpath, ".JPG", ".txt", labelpath);
    find_replace(labelpath, ".JPEG", ".txt", labelpath);
    int count = 0;
    box_label *boxes = read_boxes(labelpath, &count);
    randomize_boxes(boxes, count);
@@ -400,11 +404,12 @@
    int i;
    int count = 0;
    for(i = 0; i < n; ++i){
        char *label = find_replace(paths[i], "imgs", "labels");
        label = find_replace(label, "_iconl.jpeg", ".txt");
        char label[4096];
        find_replace(paths[i], "imgs", "labels", label);
        find_replace(label, "_iconl.jpeg", ".txt", label);
        FILE *file = fopen(label, "r");
        if(!file){
            label = find_replace(label, "labels", "labels2");
            find_replace(label, "labels", "labels2", label);
            file = fopen(label, "r");
            if(!file) continue;
        }
@@ -518,16 +523,18 @@
        int id;
        float iou;

        char *imlabel1 = find_replace(paths[i*2],   "imgs", "labels");
        imlabel1 = find_replace(imlabel1, "jpg", "txt");
        char imlabel1[4096];
        char imlabel2[4096];
        find_replace(paths[i*2],   "imgs", "labels", imlabel1);
        find_replace(imlabel1, "jpg", "txt", imlabel1);
        FILE *fp1 = fopen(imlabel1, "r");

        while(fscanf(fp1, "%d %f", &id, &iou) == 2){
            if (d.y.vals[i][2*id] < iou) d.y.vals[i][2*id] = iou;
        }

        char *imlabel2 = find_replace(paths[i*2+1], "imgs", "labels");
        imlabel2 = find_replace(imlabel2, "jpg", "txt");
        find_replace(paths[i*2+1], "imgs", "labels", imlabel2);
        find_replace(imlabel2, "jpg", "txt", imlabel2);
        FILE *fp2 = fopen(imlabel2, "r");

        while(fscanf(fp2, "%d %f", &id, &iou) == 2){
@@ -709,6 +716,7 @@
{
    int i;
    load_args args = *(load_args *)ptr;
    if (args.threads == 0) args.threads = 1;
    data *out = args.d;
    int total = args.n;
    free(ptr);

 src/deconvolutional_layer.c

@@ -80,6 +80,10 @@
    l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
    l.delta  = calloc(l.batch*out_h * out_w * n, sizeof(float));

    l.forward = forward_deconvolutional_layer;
    l.backward = backward_deconvolutional_layer;
    l.update = update_deconvolutional_layer;

    #ifdef GPU
    l.weights_gpu = cuda_make_array(l.weights, c*n*size*size);
    l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size);

 src/demo.c

@@ -1,5 +1,6 @@
#include "network.h"
#include "detection_layer.h"
#include "region_layer.h"
#include "cost_layer.h"
#include "utils.h"
#include "parser.h"
@@ -13,10 +14,10 @@
#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
#include "opencv2/imgproc/imgproc_c.h"
void convert_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
image get_image_from_stream(CvCapture *cap);

static char **demo_names;
static image *demo_labels;
static image *demo_alphabet;
static int demo_classes;

static float **probs;
@@ -50,16 +51,23 @@
{
    float nms = .4;

    detection_layer l = net.layers[net.n-1];
    layer l = net.layers[net.n-1];
    float *X = det_s.data;
    float *prediction = network_predict(net, X);

    memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float));
    mean_arrays(predictions, FRAMES, l.outputs, avg);
    l.output = avg;

    free_image(det_s);
    convert_detections(avg, l.classes, l.n, l.sqrt, l.side, 1, 1, demo_thresh, probs, boxes, 0);
    if (nms > 0) do_nms(boxes, probs, l.side*l.side*l.n, l.classes, nms);
    if(l.type == DETECTION){
        get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
    } else if (l.type == REGION){
        get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
    } else {
        error("Last layer must produce detections\n");
    }
    if (nms > 0) do_nms(boxes, probs, l.w*l.h*l.n, l.classes, nms);
    printf("\033[2J");
    printf("\033[1;1H");
    printf("\nFPS:%.1f\n",fps);
@@ -69,7 +77,7 @@
    det = images[(demo_index + FRAMES/2 + 1)%FRAMES];
    demo_index = (demo_index + 1)%FRAMES;

    draw_detections(det, l.side*l.side*l.n, demo_thresh, boxes, probs, demo_names, demo_labels, demo_classes);
    draw_detections(det, l.w*l.h*l.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes);

    return 0;
}
@@ -83,12 +91,13 @@
    return (double)time.tv_sec + (double)time.tv_usec * .000001;
}

void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, image *labels, int classes, int frame_skip)
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix)
{
    //skip = frame_skip;
    image *alphabet = load_alphabet();
    int delay = frame_skip;
    demo_names = names;
    demo_labels = labels;
    demo_alphabet = alphabet;
    demo_classes = classes;
    demo_thresh = thresh;
    printf("Demo\n");
@@ -108,16 +117,16 @@

    if(!cap) error("Couldn't connect to webcam.\n");

    detection_layer l = net.layers[net.n-1];
    layer l = net.layers[net.n-1];
    int j;

    avg = (float *) calloc(l.outputs, sizeof(float));
    for(j = 0; j < FRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float));
    for(j = 0; j < FRAMES; ++j) images[j] = make_image(1,1,3);

    boxes = (box *)calloc(l.side*l.side*l.n, sizeof(box));
    probs = (float **)calloc(l.side*l.side*l.n, sizeof(float *));
    for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *));
    boxes = (box *)calloc(l.w*l.h*l.n, sizeof(box));
    probs = (float **)calloc(l.w*l.h*l.n, sizeof(float *));
    for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = (float *)calloc(l.classes, sizeof(float *));

    pthread_t fetch_thread;
    pthread_t detect_thread;
@@ -141,9 +150,11 @@
    }

    int count = 0;
    cvNamedWindow("Demo", CV_WINDOW_NORMAL); 
    cvMoveWindow("Demo", 0, 0);
    cvResizeWindow("Demo", 1352, 1013);
    if(!prefix){
        cvNamedWindow("Demo", CV_WINDOW_NORMAL); 
        cvMoveWindow("Demo", 0, 0);
        cvResizeWindow("Demo", 1352, 1013);
    }

    double before = get_wall_time();

@@ -153,7 +164,7 @@
            if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
            if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");

            if(1){
            if(!prefix){
                show_image(disp, "Demo");
                int c = cvWaitKey(1);
                if (c == 10){
@@ -164,7 +175,7 @@
                }
            }else{
                char buff[256];
                sprintf(buff, "/home/pjreddie/tmp/bag_%07d", count);
                sprintf(buff, "%s_%08d", prefix, count);
                save_image(disp, buff);
            }

@@ -201,7 +212,7 @@
    }
}
#else
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, image *labels, int classes, int frame_skip)
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix)
{
    fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
}

 src/demo.h

@@ -2,6 +2,6 @@
#define DEMO

#include "image.h"
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, image *labels, int classes, int frame_skip);
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix);

#endif

 src/detection_layer.c

@@ -30,7 +30,12 @@
    l.truths = l.side*l.side*(1+l.coords+l.classes);
    l.output = calloc(batch*l.outputs, sizeof(float));
    l.delta = calloc(batch*l.outputs, sizeof(float));

    l.forward = forward_detection_layer;
    l.backward = backward_detection_layer;
#ifdef GPU
    l.forward_gpu = forward_detection_layer_gpu;
    l.backward_gpu = backward_detection_layer_gpu;
    l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
    l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
#endif
@@ -216,6 +221,35 @@
    axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
}

void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness)
{
    int i,j,n;
    float *predictions = l.output;
    //int per_cell = 5*num+classes;
    for (i = 0; i < l.side*l.side; ++i){
        int row = i / l.side;
        int col = i % l.side;
        for(n = 0; n < l.n; ++n){
            int index = i*l.n + n;
            int p_index = l.side*l.side*l.classes + i*l.n + n;
            float scale = predictions[p_index];
            int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n)*4;
            boxes[index].x = (predictions[box_index + 0] + col) / l.side * w;
            boxes[index].y = (predictions[box_index + 1] + row) / l.side * h;
            boxes[index].w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w;
            boxes[index].h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h;
            for(j = 0; j < l.classes; ++j){
                int class_index = i*l.classes;
                float prob = scale*predictions[class_index+j];
                probs[index][j] = (prob > thresh) ? prob : 0;
            }
            if(only_objectness){
                probs[index][0] = scale;
            }
        }
    }
}

#ifdef GPU

void forward_detection_layer_gpu(const detection_layer l, network_state state)

 src/detection_layer.h

@@ -9,6 +9,7 @@
detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore);
void forward_detection_layer(const detection_layer l, network_state state);
void backward_detection_layer(const detection_layer l, network_state state);
void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);

#ifdef GPU
void forward_detection_layer_gpu(const detection_layer l, network_state state);

 src/detector.c

@@ -1,16 +1,16 @@
#include "network.h"
#include "detection_layer.h"
#include "region_layer.h"
#include "cost_layer.h"
#include "utils.h"
#include "parser.h"
#include "box.h"
#include "demo.h"

#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
#endif

static char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
static image voc_labels[20];

void train_detector(char *cfgfile, char *weightfile)
{
@@ -49,13 +49,14 @@
    args.num_boxes = l.max_boxes;
    args.d = &buffer;
    args.type = DETECTION_DATA;
    args.threads = 4;

    args.angle = net.angle;
    args.exposure = net.exposure;
    args.saturation = net.saturation;
    args.hue = net.hue;

    pthread_t load_thread = load_data_in_thread(args);
    pthread_t load_thread = load_data(args);
    clock_t time;
    //while(i*imgs < N*120){
    while(get_current_batch(net) < net.max_batches){
@@ -63,7 +64,7 @@
        time=clock();
        pthread_join(load_thread, 0);
        train = buffer;
        load_thread = load_data_in_thread(args);
        load_thread = load_data(args);

/*
        int k;
@@ -102,44 +103,6 @@
    save_weights(net, buff);
}

static void convert_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness)
{
    int i,j,n;
    //int per_cell = 5*num+classes;
    for (i = 0; i < side*side; ++i){
        int row = i / side;
        int col = i % side;
        for(n = 0; n < num; ++n){
            int index = i*num + n;
            int p_index = index * (classes + 5) + 4;
            float scale = predictions[p_index];
            int box_index = index * (classes + 5);
            boxes[index].x = (predictions[box_index + 0] + col + .5) / side * w;
            boxes[index].y = (predictions[box_index + 1] + row + .5) / side * h;
            if(0){
                boxes[index].x = (logistic_activate(predictions[box_index + 0]) + col) / side * w;
                boxes[index].y = (logistic_activate(predictions[box_index + 1]) + row) / side * h;
            }
            boxes[index].w = pow(logistic_activate(predictions[box_index + 2]), (square?2:1)) * w;
            boxes[index].h = pow(logistic_activate(predictions[box_index + 3]), (square?2:1)) * h;
            if(1){
                boxes[index].x = ((col + .5)/side + predictions[box_index + 0] * .5) * w;
                boxes[index].y = ((row + .5)/side + predictions[box_index + 1] * .5) * h;
                boxes[index].w = (exp(predictions[box_index + 2]) * .5) * w;
                boxes[index].h = (exp(predictions[box_index + 3]) * .5) * h;
            }
            for(j = 0; j < classes; ++j){
                int class_index = index * (classes + 5) + 5;
                float prob = scale*predictions[class_index+j];
                probs[index][j] = (prob > thresh) ? prob : 0;
            }
            if(only_objectness){
                probs[index][0] = scale;
            }
        }
    }
}

void print_detector_detections(FILE **fps, char *id, box *boxes, float **probs, int total, int classes, int w, int h)
{
    int i, j;
@@ -179,7 +142,6 @@

    layer l = net.layers[net.n-1];
    int classes = l.classes;
    int side = l.w;

    int j;
    FILE **fps = calloc(classes, sizeof(FILE *));
@@ -188,9 +150,9 @@
        snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
        fps[j] = fopen(buff, "w");
    }
    box *boxes = calloc(side*side*l.n, sizeof(box));
    float **probs = calloc(side*side*l.n, sizeof(float *));
    for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
    box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
    float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
    for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));

    int m = plist->size;
    int i=0;
@@ -235,12 +197,12 @@
            char *path = paths[i+t-nthreads];
            char *id = basecfg(path);
            float *X = val_resized[t].data;
            float *predictions = network_predict(net, X);
            network_predict(net, X);
            int w = val[t].w;
            int h = val[t].h;
            convert_detections(predictions, classes, l.n, 0, side, w, h, thresh, probs, boxes, 0);
            if (nms) do_nms_sort(boxes, probs, side*side*l.n, classes, nms);
            print_detector_detections(fps, id, boxes, probs, side*side*l.n, classes, w, h);
            get_region_boxes(l, w, h, thresh, probs, boxes, 0);
            if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, classes, nms);
            print_detector_detections(fps, id, boxes, probs, l.w*l.h*l.n, classes, w, h);
            free(id);
            free_image(val[t]);
            free_image(val_resized[t]);
@@ -268,8 +230,6 @@

    layer l = net.layers[net.n-1];
    int classes = l.classes;
    int square = l.sqrt;
    int side = l.side;

    int j, k;
    FILE **fps = calloc(classes, sizeof(FILE *));
@@ -278,9 +238,9 @@
        snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
        fps[j] = fopen(buff, "w");
    }
    box *boxes = calloc(side*side*l.n, sizeof(box));
    float **probs = calloc(side*side*l.n, sizeof(float *));
    for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
    box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
    float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
    for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));

    int m = plist->size;
    int i=0;
@@ -299,18 +259,19 @@
        image orig = load_image_color(path, 0, 0);
        image sized = resize_image(orig, net.w, net.h);
        char *id = basecfg(path);
        float *predictions = network_predict(net, sized.data);
        convert_detections(predictions, classes, l.n, square, l.w, 1, 1, thresh, probs, boxes, 1);
        if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms);
        network_predict(net, sized.data);
        get_region_boxes(l, 1, 1, thresh, probs, boxes, 1);
        if (nms) do_nms(boxes, probs, l.w*l.h*l.n, 1, nms);

        char *labelpath = find_replace(path, "images", "labels");
        labelpath = find_replace(labelpath, "JPEGImages", "labels");
        labelpath = find_replace(labelpath, ".jpg", ".txt");
        labelpath = find_replace(labelpath, ".JPEG", ".txt");
        char labelpath[4096];
        find_replace(path, "images", "labels", labelpath);
        find_replace(labelpath, "JPEGImages", "labels", labelpath);
        find_replace(labelpath, ".jpg", ".txt", labelpath);
        find_replace(labelpath, ".JPEG", ".txt", labelpath);

        int num_labels = 0;
        box_label *truth = read_boxes(labelpath, &num_labels);
        for(k = 0; k < side*side*l.n; ++k){
        for(k = 0; k < l.w*l.h*l.n; ++k){
            if(probs[k][0] > thresh){
                ++proposals;
            }
@@ -319,7 +280,7 @@
            ++total;
            box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h};
            float best_iou = 0;
            for(k = 0; k < side*side*l.n; ++k){
            for(k = 0; k < l.w*l.h*l.n; ++k){
                float iou = box_iou(boxes[k], t);
                if(probs[k][0] > thresh && iou > best_iou){
                    best_iou = iou;
@@ -340,13 +301,12 @@

void test_detector(char *cfgfile, char *weightfile, char *filename, float thresh)
{

    image *alphabet = load_alphabet();
    network net = parse_network_cfg(cfgfile);
    if(weightfile){
        load_weights(&net, weightfile);
    }
    detection_layer l = net.layers[net.n-1];
    l.side = l.w;
    layer l = net.layers[net.n-1];
    set_batch_network(&net, 1);
    srand(2222222);
    clock_t time;
@@ -354,9 +314,9 @@
    char *input = buff;
    int j;
    float nms=.4;
    box *boxes = calloc(l.side*l.side*l.n, sizeof(box));
    float **probs = calloc(l.side*l.side*l.n, sizeof(float *));
    for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
    box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
    float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
    for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
    while(1){
        if(filename){
            strncpy(input, filename, 256);
@@ -371,12 +331,12 @@
        image sized = resize_image(im, net.w, net.h);
        float *X = sized.data;
        time=clock();
        float *predictions = network_predict(net, X);
        network_predict(net, X);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        convert_detections(predictions, l.classes, l.n, 0, l.w, 1, 1, thresh, probs, boxes, 0);
        if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
        //draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20);
        draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20);
        get_region_boxes(l, 1, 1, thresh, probs, boxes, 0);
        if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
        //draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, voc_names, voc_labels, 20);
        draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, voc_names, alphabet, 20);
        save_image(im, "predictions");
        show_image(im, "predictions");

@@ -392,14 +352,10 @@

void run_detector(int argc, char **argv)
{
    int i;
    for(i = 0; i < 20; ++i){
        char buff[256];
        sprintf(buff, "data/labels/%s.png", voc_names[i]);
        voc_labels[i] = load_image_color(buff, 0, 0);
    }

    char *prefix = find_char_arg(argc, argv, "-prefix", 0);
    float thresh = find_float_arg(argc, argv, "-thresh", .2);
    int cam_index = find_int_arg(argc, argv, "-c", 0);
    int frame_skip = find_int_arg(argc, argv, "-s", 0);
    if(argc < 4){
        fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
        return;
@@ -412,4 +368,5 @@
    else if(0==strcmp(argv[2], "train")) train_detector(cfg, weights);
    else if(0==strcmp(argv[2], "valid")) validate_detector(cfg, weights);
    else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
    else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix);
}

 src/dropout_layer.c

@@ -15,7 +15,11 @@
    l.batch = batch;
    l.rand = calloc(inputs*batch, sizeof(float));
    l.scale = 1./(1.-probability);
    l.forward = forward_dropout_layer;
    l.backward = backward_dropout_layer;
    #ifdef GPU
    l.forward_gpu = forward_dropout_layer_gpu;
    l.backward_gpu = backward_dropout_layer_gpu;
    l.rand_gpu = cuda_make_array(l.rand, inputs*batch);
    #endif
    return l;

 src/gru_layer.c

@@ -85,7 +85,15 @@
    l.z_cpu = calloc(outputs*batch, sizeof(float));
    l.h_cpu = calloc(outputs*batch, sizeof(float));

    l.forward = forward_gru_layer;
    l.backward = backward_gru_layer;
    l.update = update_gru_layer;

#ifdef GPU
    l.forward_gpu = forward_gru_layer_gpu;
    l.backward_gpu = backward_gru_layer_gpu;
    l.update_gpu = update_gru_layer_gpu;

    l.forgot_state_gpu = cuda_make_array(l.output, batch*outputs);
    l.forgot_delta_gpu = cuda_make_array(l.output, batch*outputs);
    l.prev_state_gpu = cuda_make_array(l.output, batch*outputs);

 src/gru_layer.h

@@ -1,24 +1,23 @@

#ifndef RNN_LAYER_H
#define RNN_LAYER_H
#ifndef GRU_LAYER_H
#define GRU_LAYER_H

#include "activations.h"
#include "layer.h"
#include "network.h"
#define USET

layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log);
layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);

void forward_rnn_layer(layer l, network_state state);
void backward_rnn_layer(layer l, network_state state);
void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay);
void forward_gru_layer(layer l, network_state state);
void backward_gru_layer(layer l, network_state state);
void update_gru_layer(layer l, int batch, float learning_rate, float momentum, float decay);

#ifdef GPU
void forward_rnn_layer_gpu(layer l, network_state state);
void backward_rnn_layer_gpu(layer l, network_state state);
void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
void push_rnn_layer(layer l);
void pull_rnn_layer(layer l);
void forward_gru_layer_gpu(layer l, network_state state);
void backward_gru_layer_gpu(layer l, network_state state);
void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
void push_gru_layer(layer l);
void pull_gru_layer(layer l);
#endif

#endif

 src/image.c

@@ -10,6 +10,12 @@
#define STB_IMAGE_WRITE_IMPLEMENTATION
#include "stb_image_write.h"

#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
#include "opencv2/imgproc/imgproc_c.h"
#endif


int windows = 0;

float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} };
@@ -25,10 +31,66 @@
    return r;
}

void composite_image(image source, image dest, int dx, int dy)
{
    int x,y,k;
    for(k = 0; k < source.c; ++k){
        for(y = 0; y < source.h; ++y){
            for(x = 0; x < source.w; ++x){
                float val = get_pixel(source, x, y, k);
                float val2 = get_pixel_extend(dest, dx+x, dy+y, k);
                set_pixel(dest, dx+x, dy+y, k, val * val2);
            }
        }
    }
}

image border_image(image a, int border)
{
    image b = make_image(a.w + 2*border, a.h + 2*border, a.c);
    int x,y,k;
    for(k = 0; k < b.c; ++k){
        for(y = 0; y < b.h; ++y){
            for(x = 0; x < b.w; ++x){
                float val = get_pixel_extend(a, x - border, y - border, k);
                set_pixel(b, x, y, k, val);
            }
        }
    }
    return b;
}

image tile_images(image a, image b, int dx)
{
    if(a.w == 0) return copy_image(b);
    image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c);
    fill_cpu(c.w*c.h*c.c, 1, c.data, 1);
    embed_image(a, c, 0, 0); 
    composite_image(b, c, a.w + dx, 0);
    return c;
}

image get_label(image *characters, char *string)
{
    image label = make_empty_image(0,0,0);
    while(*string){
        image l = characters[(int)*string];
        image n = tile_images(label, l, -4);
        free_image(label);
        label = n;
        ++string;
    }
    image b = border_image(label, label.h*.25);
    free_image(label);
    return b;
}

void draw_label(image a, int r, int c, image label, const float *rgb)
{
    float ratio = (float) label.w / label.h;
    int h = label.h;
    int h = a.h * .04;
    h = label.h;
    h = a.h * .06;
    int w = ratio * h;
    image rl = resize_image(label, w, h);
    if (r - h >= 0) r = r - h;
@@ -102,7 +164,19 @@
    }
}

void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *labels, int classes)
image *load_alphabet()
{
    int i;
    image *alphabet = calloc(128, sizeof(image));
    for(i = 32; i < 127; ++i){
        char buff[256];
        sprintf(buff, "data/labels/%d.png", i);
        alphabet[i] = load_image_color(buff, 0, 0);
    }
    return alphabet;
}

void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *alphabet, int classes)
{
    int i;

@@ -111,7 +185,7 @@
        float prob = probs[i][class];
        if(prob > thresh){
            //int width = pow(prob, 1./2.)*30+1;
            int width = 8;
            int width = im.h * .012;
            printf("%s: %.0f%%\n", names[class], prob*100);
            int offset = class*1 % classes;
            float red = get_color(2,offset,classes);
@@ -137,7 +211,10 @@
            if(bot > im.h-1) bot = im.h-1;

            draw_box_width(im, left, top, right, bot, width, red, green, blue);
            if (labels) draw_label(im, top + width, left, labels[class], rgb);
            if (alphabet) {
                image label = get_label(alphabet, names[class]);
                draw_label(im, top + width, left, label, rgb);
            }
        }
    }
}
@@ -368,6 +445,53 @@
}

#ifdef OPENCV

image ipl_to_image(IplImage* src)
{
    unsigned char *data = (unsigned char *)src->imageData;
    int h = src->height;
    int w = src->width;
    int c = src->nChannels;
    int step = src->widthStep;
    image out = make_image(w, h, c);
    int i, j, k, count=0;;

    for(k= 0; k < c; ++k){
        for(i = 0; i < h; ++i){
            for(j = 0; j < w; ++j){
                out.data[count++] = data[i*step + j*c + k]/255.;
            }
        }
    }
    return out;
}

image load_image_cv(char *filename, int channels)
{
    IplImage* src = 0;
    int flag = -1;
    if (channels == 0) flag = -1;
    else if (channels == 1) flag = 0;
    else if (channels == 3) flag = 1;
    else {
        fprintf(stderr, "OpenCV can't force load with %d channels\n", channels);
    }

    if( (src = cvLoadImage(filename, flag)) == 0 )
    {
        fprintf(stderr, "Cannot load image \"%s\"\n", filename);
        char buff[256];
        sprintf(buff, "echo %s >> bad.list", filename);
        system(buff);
        return make_image(10,10,3);
        //exit(0);
    }
    image out = ipl_to_image(src);
    cvReleaseImage(&src);
    rgbgr_image(out);
    return out;
}

image get_image_from_stream(CvCapture *cap)
{
    IplImage* src = cvQueryFrame(cap);
@@ -376,9 +500,7 @@
    rgbgr_image(im);
    return im;
}
#endif

#ifdef OPENCV
void save_image_jpg(image p, const char *name)
{
    image copy = copy_image(p);
@@ -980,7 +1102,7 @@
        image aug = random_augment_image(im, 0, 320, 448, 320, .75);
        show_image(aug, "aug");
        free_image(aug);
        


        float exposure = 1.15;
        float saturation = 1.15;
@@ -1001,55 +1123,6 @@
#endif
}

#ifdef OPENCV
image ipl_to_image(IplImage* src)
{
    unsigned char *data = (unsigned char *)src->imageData;
    int h = src->height;
    int w = src->width;
    int c = src->nChannels;
    int step = src->widthStep;
    image out = make_image(w, h, c);
    int i, j, k, count=0;;

    for(k= 0; k < c; ++k){
        for(i = 0; i < h; ++i){
            for(j = 0; j < w; ++j){
                out.data[count++] = data[i*step + j*c + k]/255.;
            }
        }
    }
    return out;
}

image load_image_cv(char *filename, int channels)
{
    IplImage* src = 0;
    int flag = -1;
    if (channels == 0) flag = -1;
    else if (channels == 1) flag = 0;
    else if (channels == 3) flag = 1;
    else {
        fprintf(stderr, "OpenCV can't force load with %d channels\n", channels);
    }

    if( (src = cvLoadImage(filename, flag)) == 0 )
    {
        fprintf(stderr, "Cannot load image \"%s\"\n", filename);
        char buff[256];
        sprintf(buff, "echo %s >> bad.list", filename);
        system(buff);
        return make_image(10,10,3);
        //exit(0);
    }
    image out = ipl_to_image(src);
    cvReleaseImage(&src);
    rgbgr_image(out);
    return out;
}

#endif


image load_image_stb(char *filename, int channels)
{
@@ -1122,6 +1195,7 @@
}
void set_pixel(image m, int x, int y, int c, float val)
{
    if (x < 0 || y < 0 || c < 0 || x >= m.w || y >= m.h || c >= m.c) return;
    assert(x < m.w && y < m.h && c < m.c);
    m.data[c*m.h*m.w + y*m.w + x] = val;
}
@@ -1247,5 +1321,7 @@

void free_image(image m)
{
    free(m.data);
    if(m.data){
        free(m.data);
    }
}

 src/image.h

@@ -8,11 +8,6 @@
#include <math.h>
#include "box.h"

#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
#include "opencv2/imgproc/imgproc_c.h"
#endif

typedef struct {
    int h;
    int w;
@@ -26,6 +21,7 @@
void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
void draw_bbox(image a, box bbox, int w, float r, float g, float b);
void draw_label(image a, int r, int c, image label, const float *rgb);
void write_label(image a, int r, int c, image *characters, char *string, float *rgb);
void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image *labels, int classes);
image image_distance(image a, image b);
void scale_image(image m, float s);
@@ -64,12 +60,6 @@
void show_image_layers(image p, char *name);
void show_image_collapsed(image p, char *name);

#ifdef OPENCV
void save_image_jpg(image p, const char *name);
image get_image_from_stream(CvCapture *cap);
image ipl_to_image(IplImage* src);
#endif

void print_image(image m);

image make_image(int w, int h, int c);
@@ -79,6 +69,7 @@
image copy_image(image p);
image load_image(char *filename, int w, int h, int c);
image load_image_color(char *filename, int w, int h);
image *load_alphabet();

float get_pixel(image m, int x, int y, int c);
float get_pixel_extend(image m, int x, int y, int c);

 src/layer.h

@@ -4,6 +4,8 @@
#include "activations.h"
#include "stddef.h"

struct network_state;

struct layer;
typedef struct layer layer;

@@ -42,6 +44,12 @@
    LAYER_TYPE type;
    ACTIVATION activation;
    COST_TYPE cost_type;
    void (*forward)   (struct layer, struct network_state);
    void (*backward)  (struct layer, struct network_state);
    void (*update)    (struct layer, int, float, float, float);
    void (*forward_gpu)   (struct layer, struct network_state);
    void (*backward_gpu)  (struct layer, struct network_state);
    void (*update_gpu)    (struct layer, int, float, float, float);
    int batch_normalize;
    int shortcut;
    int batch;

 src/local_layer.c

@@ -60,8 +60,16 @@
    l.col_image = calloc(out_h*out_w*size*size*c, sizeof(float));
    l.output = calloc(l.batch*out_h * out_w * n, sizeof(float));
    l.delta  = calloc(l.batch*out_h * out_w * n, sizeof(float));
    
    l.forward = forward_local_layer;
    l.backward = backward_local_layer;
    l.update = update_local_layer;

#ifdef GPU
    l.forward_gpu = forward_local_layer_gpu;
    l.backward_gpu = backward_local_layer_gpu;
    l.update_gpu = update_local_layer_gpu;

    l.weights_gpu = cuda_make_array(l.weights, c*n*size*size*locations);
    l.weight_updates_gpu = cuda_make_array(l.weight_updates, c*n*size*size*locations);


 src/maxpool_layer.c

@@ -39,7 +39,11 @@
    l.indexes = calloc(output_size, sizeof(int));
    l.output =  calloc(output_size, sizeof(float));
    l.delta =   calloc(output_size, sizeof(float));
    l.forward = forward_maxpool_layer;
    l.backward = backward_maxpool_layer;
    #ifdef GPU
    l.forward_gpu = forward_maxpool_layer_gpu;
    l.backward_gpu = backward_maxpool_layer_gpu;
    l.indexes_gpu = cuda_make_int_array(output_size);
    l.output_gpu  = cuda_make_array(l.output, output_size);
    l.delta_gpu   = cuda_make_array(l.delta, output_size);

 src/network.c

@@ -15,7 +15,6 @@
#include "local_layer.h"
#include "convolutional_layer.h"
#include "activation_layer.h"
#include "deconvolutional_layer.h"
#include "detection_layer.h"
#include "region_layer.h"
#include "normalization_layer.h"
@@ -153,49 +152,7 @@
        if(l.delta){
            scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
        }
        if(l.type == CONVOLUTIONAL){
            forward_convolutional_layer(l, state);
        } else if(l.type == DECONVOLUTIONAL){
            forward_deconvolutional_layer(l, state);
        } else if(l.type == ACTIVE){
            forward_activation_layer(l, state);
        } else if(l.type == LOCAL){
            forward_local_layer(l, state);
        } else if(l.type == NORMALIZATION){
            forward_normalization_layer(l, state);
        } else if(l.type == BATCHNORM){
            forward_batchnorm_layer(l, state);
        } else if(l.type == DETECTION){
            forward_detection_layer(l, state);
        } else if(l.type == REGION){
            forward_region_layer(l, state);
        } else if(l.type == CONNECTED){
            forward_connected_layer(l, state);
        } else if(l.type == RNN){
            forward_rnn_layer(l, state);
        } else if(l.type == GRU){
            forward_gru_layer(l, state);
        } else if(l.type == CRNN){
            forward_crnn_layer(l, state);
        } else if(l.type == CROP){
            forward_crop_layer(l, state);
        } else if(l.type == COST){
            forward_cost_layer(l, state);
        } else if(l.type == SOFTMAX){
            forward_softmax_layer(l, state);
        } else if(l.type == MAXPOOL){
            forward_maxpool_layer(l, state);
        } else if(l.type == REORG){
            forward_reorg_layer(l, state);
        } else if(l.type == AVGPOOL){
            forward_avgpool_layer(l, state);
        } else if(l.type == DROPOUT){
            forward_dropout_layer(l, state);
        } else if(l.type == ROUTE){
            forward_route_layer(l, net);
        } else if(l.type == SHORTCUT){
            forward_shortcut_layer(l, state);
        }
        l.forward(l, state);
        state.input = l.output;
    }
}
@@ -207,29 +164,17 @@
    float rate = get_current_rate(net);
    for(i = 0; i < net.n; ++i){
        layer l = net.layers[i];
        if(l.type == CONVOLUTIONAL){
            update_convolutional_layer(l, update_batch, rate, net.momentum, net.decay);
        } else if(l.type == DECONVOLUTIONAL){
            update_deconvolutional_layer(l, rate, net.momentum, net.decay);
        } else if(l.type == CONNECTED){
            update_connected_layer(l, update_batch, rate, net.momentum, net.decay);
        } else if(l.type == RNN){
            update_rnn_layer(l, update_batch, rate, net.momentum, net.decay);
        } else if(l.type == GRU){
            update_gru_layer(l, update_batch, rate, net.momentum, net.decay);
        } else if(l.type == CRNN){
            update_crnn_layer(l, update_batch, rate, net.momentum, net.decay);
        } else if(l.type == LOCAL){
            update_local_layer(l, update_batch, rate, net.momentum, net.decay);
        if(l.update){
            l.update(l, update_batch, rate, net.momentum, net.decay);
        }
    }
}

float *get_network_output(network net)
{
    #ifdef GPU
        if (gpu_index >= 0) return get_network_output_gpu(net);
    #endif 
#ifdef GPU
    if (gpu_index >= 0) return get_network_output_gpu(net);
#endif 
    int i;
    for(i = net.n-1; i > 0; --i) if(net.layers[i].type != COST) break;
    return net.layers[i].output;
@@ -273,47 +218,7 @@
            state.delta = prev.delta;
        }
        layer l = net.layers[i];
        if(l.type == CONVOLUTIONAL){
            backward_convolutional_layer(l, state);
        } else if(l.type == DECONVOLUTIONAL){
            backward_deconvolutional_layer(l, state);
        } else if(l.type == ACTIVE){
            backward_activation_layer(l, state);
        } else if(l.type == NORMALIZATION){
            backward_normalization_layer(l, state);
        } else if(l.type == BATCHNORM){
            backward_batchnorm_layer(l, state);
        } else if(l.type == MAXPOOL){
            if(i != 0) backward_maxpool_layer(l, state);
        } else if(l.type == REORG){
            backward_reorg_layer(l, state);
        } else if(l.type == AVGPOOL){
            backward_avgpool_layer(l, state);
        } else if(l.type == DROPOUT){
            backward_dropout_layer(l, state);
        } else if(l.type == DETECTION){
            backward_detection_layer(l, state);
        } else if(l.type == REGION){
            backward_region_layer(l, state);
        } else if(l.type == SOFTMAX){
            if(i != 0) backward_softmax_layer(l, state);
        } else if(l.type == CONNECTED){
            backward_connected_layer(l, state);
        } else if(l.type == RNN){
            backward_rnn_layer(l, state);
        } else if(l.type == GRU){
            backward_gru_layer(l, state);
        } else if(l.type == CRNN){
            backward_crnn_layer(l, state);
        } else if(l.type == LOCAL){
            backward_local_layer(l, state);
        } else if(l.type == COST){
            backward_cost_layer(l, state);
        } else if(l.type == ROUTE){
            backward_route_layer(l, net);
        } else if(l.type == SHORTCUT){
            backward_shortcut_layer(l, state);
        }
        l.backward(l, state);
    }
}

@@ -406,11 +311,11 @@
    int i;
    for(i = 0; i < net->n; ++i){
        net->layers[i].batch = b;
        #ifdef CUDNN
#ifdef CUDNN
        if(net->layers[i].type == CONVOLUTIONAL){
            cudnn_convolutional_setup(net->layers + i);
        }
        #endif
#endif
    }
}


 src/network_kernels.cu

@@ -22,7 +22,6 @@
#include "region_layer.h"
#include "convolutional_layer.h"
#include "activation_layer.h"
#include "deconvolutional_layer.h"
#include "maxpool_layer.h"
#include "reorg_layer.h"
#include "avgpool_layer.h"
@@ -51,49 +50,7 @@
        if(l.delta_gpu){
            fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
        }
        if(l.type == CONVOLUTIONAL){
            forward_convolutional_layer_gpu(l, state);
        } else if(l.type == DECONVOLUTIONAL){
            forward_deconvolutional_layer_gpu(l, state);
        } else if(l.type == ACTIVE){
            forward_activation_layer_gpu(l, state);
        } else if(l.type == LOCAL){
            forward_local_layer_gpu(l, state);
        } else if(l.type == DETECTION){
            forward_detection_layer_gpu(l, state);
        } else if(l.type == REGION){
            forward_region_layer_gpu(l, state);
        } else if(l.type == CONNECTED){
            forward_connected_layer_gpu(l, state);
        } else if(l.type == RNN){
            forward_rnn_layer_gpu(l, state);
        } else if(l.type == GRU){
            forward_gru_layer_gpu(l, state);
        } else if(l.type == CRNN){
            forward_crnn_layer_gpu(l, state);
        } else if(l.type == CROP){
            forward_crop_layer_gpu(l, state);
        } else if(l.type == COST){
            forward_cost_layer_gpu(l, state);
        } else if(l.type == SOFTMAX){
            forward_softmax_layer_gpu(l, state);
        } else if(l.type == NORMALIZATION){
            forward_normalization_layer_gpu(l, state);
        } else if(l.type == BATCHNORM){
            forward_batchnorm_layer_gpu(l, state);
        } else if(l.type == MAXPOOL){
            forward_maxpool_layer_gpu(l, state);
        } else if(l.type == REORG){
            forward_reorg_layer_gpu(l, state);
        } else if(l.type == AVGPOOL){
            forward_avgpool_layer_gpu(l, state);
        } else if(l.type == DROPOUT){
            forward_dropout_layer_gpu(l, state);
        } else if(l.type == ROUTE){
            forward_route_layer_gpu(l, net);
        } else if(l.type == SHORTCUT){
            forward_shortcut_layer_gpu(l, state);
        }
        l.forward_gpu(l, state);
        state.input = l.output_gpu;
    }
}
@@ -115,47 +72,7 @@
            state.input = prev.output_gpu;
            state.delta = prev.delta_gpu;
        }
        if(l.type == CONVOLUTIONAL){
            backward_convolutional_layer_gpu(l, state);
        } else if(l.type == DECONVOLUTIONAL){
            backward_deconvolutional_layer_gpu(l, state);
        } else if(l.type == ACTIVE){
            backward_activation_layer_gpu(l, state);
        } else if(l.type == LOCAL){
            backward_local_layer_gpu(l, state);
        } else if(l.type == MAXPOOL){
            if(i != 0) backward_maxpool_layer_gpu(l, state);
        } else if(l.type == REORG){
            backward_reorg_layer_gpu(l, state);
        } else if(l.type == AVGPOOL){
            if(i != 0) backward_avgpool_layer_gpu(l, state);
        } else if(l.type == DROPOUT){
            backward_dropout_layer_gpu(l, state);
        } else if(l.type == DETECTION){
            backward_detection_layer_gpu(l, state);
        } else if(l.type == REGION){
            backward_region_layer_gpu(l, state);
        } else if(l.type == NORMALIZATION){
            backward_normalization_layer_gpu(l, state);
        } else if(l.type == BATCHNORM){
            backward_batchnorm_layer_gpu(l, state);
        } else if(l.type == SOFTMAX){
            if(i != 0) backward_softmax_layer_gpu(l, state);
        } else if(l.type == CONNECTED){
            backward_connected_layer_gpu(l, state);
        } else if(l.type == RNN){
            backward_rnn_layer_gpu(l, state);
        } else if(l.type == GRU){
            backward_gru_layer_gpu(l, state);
        } else if(l.type == CRNN){
            backward_crnn_layer_gpu(l, state);
        } else if(l.type == COST){
            backward_cost_layer_gpu(l, state);
        } else if(l.type == ROUTE){
            backward_route_layer_gpu(l, net);
        } else if(l.type == SHORTCUT){
            backward_shortcut_layer_gpu(l, state);
        }
        l.backward_gpu(l, state);
    }
}

@@ -166,20 +83,8 @@
    float rate = get_current_rate(net);
    for(i = 0; i < net.n; ++i){
        layer l = net.layers[i];
        if(l.type == CONVOLUTIONAL){
            update_convolutional_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
        } else if(l.type == DECONVOLUTIONAL){
            update_deconvolutional_layer_gpu(l, rate, net.momentum, net.decay);
        } else if(l.type == CONNECTED){
            update_connected_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
        } else if(l.type == GRU){
            update_gru_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
        } else if(l.type == RNN){
            update_rnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
        } else if(l.type == CRNN){
            update_crnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
        } else if(l.type == LOCAL){
            update_local_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
        if(l.update_gpu){
            l.update_gpu(l, update_batch, rate, net.momentum, net.decay);
        }
    }
}
@@ -271,20 +176,8 @@
{
    int update_batch = net.batch*net.subdivisions;
    float rate = get_current_rate(net);
    if(l.type == CONVOLUTIONAL){
        update_convolutional_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
    } else if(l.type == DECONVOLUTIONAL){
        update_deconvolutional_layer_gpu(l, rate, net.momentum, net.decay);
    } else if(l.type == CONNECTED){
        update_connected_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
    } else if(l.type == RNN){
        update_rnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
    } else if(l.type == GRU){
        update_gru_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
    } else if(l.type == CRNN){
        update_crnn_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
    } else if(l.type == LOCAL){
        update_local_layer_gpu(l, update_batch, rate, net.momentum, net.decay);
    if(l.update_gpu){
        l.update_gpu(l, update_batch, rate, net.momentum, net.decay);
    }
}

@@ -463,7 +356,7 @@
    }
    for(i = 0; i < n; ++i){
        pthread_join(threads[i], 0);
        printf("%f\n", errors[i]);
        //printf("%f\n", errors[i]);
        sum += errors[i];
    }
    if (get_current_batch(nets[0]) % interval == 0) {
@@ -492,6 +385,7 @@

float *network_predict_gpu(network net, float *input)
{
    cuda_set_device(net.gpu_index);
    int size = get_network_input_size(net) * net.batch;
    network_state state;
    state.index = 0;

 src/normalization_layer.c

@@ -21,7 +21,13 @@
    layer.norms = calloc(h * w * c * batch, sizeof(float));
    layer.inputs = w*h*c;
    layer.outputs = layer.inputs;

    layer.forward = forward_normalization_layer;
    layer.backward = backward_normalization_layer;
    #ifdef GPU
    layer.forward_gpu = forward_normalization_layer_gpu;
    layer.backward_gpu = backward_normalization_layer_gpu;

    layer.output_gpu =  cuda_make_array(layer.output, h * w * c * batch);
    layer.delta_gpu =   cuda_make_array(layer.delta, h * w * c * batch);
    layer.squared_gpu = cuda_make_array(layer.squared, h * w * c * batch);

 src/parser.c

@@ -12,7 +12,6 @@
#include "activation_layer.h"
#include "normalization_layer.h"
#include "batchnorm_layer.h"
#include "deconvolutional_layer.h"
#include "connected_layer.h"
#include "rnn_layer.h"
#include "gru_layer.h"
@@ -36,30 +35,42 @@
    list *options;
}section;

int is_network(section *s);
int is_convolutional(section *s);
int is_activation(section *s);
int is_local(section *s);
int is_deconvolutional(section *s);
int is_connected(section *s);
int is_rnn(section *s);
int is_gru(section *s);
int is_crnn(section *s);
int is_maxpool(section *s);
int is_reorg(section *s);
int is_avgpool(section *s);
int is_dropout(section *s);
int is_softmax(section *s);
int is_normalization(section *s);
int is_batchnorm(section *s);
int is_crop(section *s);
int is_shortcut(section *s);
int is_cost(section *s);
int is_detection(section *s);
int is_region(section *s);
int is_route(section *s);
list *read_cfg(char *filename);

LAYER_TYPE string_to_layer_type(char * type)
{

    if (strcmp(type, "[shortcut]")==0) return SHORTCUT;
    if (strcmp(type, "[crop]")==0) return CROP;
    if (strcmp(type, "[cost]")==0) return COST;
    if (strcmp(type, "[detection]")==0) return DETECTION;
    if (strcmp(type, "[region]")==0) return REGION;
    if (strcmp(type, "[local]")==0) return LOCAL;
    if (strcmp(type, "[conv]")==0
            || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
    if (strcmp(type, "[activation]")==0) return ACTIVE;
    if (strcmp(type, "[net]")==0
            || strcmp(type, "[network]")==0) return NETWORK;
    if (strcmp(type, "[crnn]")==0) return CRNN;
    if (strcmp(type, "[gru]")==0) return GRU;
    if (strcmp(type, "[rnn]")==0) return RNN;
    if (strcmp(type, "[conn]")==0
            || strcmp(type, "[connected]")==0) return CONNECTED;
    if (strcmp(type, "[max]")==0
            || strcmp(type, "[maxpool]")==0) return MAXPOOL;
    if (strcmp(type, "[reorg]")==0) return REORG;
    if (strcmp(type, "[avg]")==0
            || strcmp(type, "[avgpool]")==0) return AVGPOOL;
    if (strcmp(type, "[dropout]")==0) return DROPOUT;
    if (strcmp(type, "[lrn]")==0
            || strcmp(type, "[normalization]")==0) return NORMALIZATION;
    if (strcmp(type, "[batchnorm]")==0) return BATCHNORM;
    if (strcmp(type, "[soft]")==0
            || strcmp(type, "[softmax]")==0) return SOFTMAX;
    if (strcmp(type, "[route]")==0) return ROUTE;
    return BLANK;
}

void free_section(section *s)
{
    free(s->type);
@@ -102,26 +113,6 @@
    int time_steps;
} size_params;

deconvolutional_layer parse_deconvolutional(list *options, size_params params)
{
    int n = option_find_int(options, "filters",1);
    int size = option_find_int(options, "size",1);
    int stride = option_find_int(options, "stride",1);
    char *activation_s = option_find_str(options, "activation", "logistic");
    ACTIVATION activation = get_activation(activation_s);

    int batch,h,w,c;
    h = params.h;
    w = params.w;
    c = params.c;
    batch=params.batch;
    if(!(h && w && c)) error("Layer before deconvolutional layer must output image.");

    deconvolutional_layer layer = make_deconvolutional_layer(batch,h,w,c,n,size,stride,activation);

    return layer;
}

local_layer parse_local(list *options, size_params params)
{
    int n = option_find_int(options, "filters",1);
@@ -545,6 +536,12 @@
    net->max_batches = option_find_int(options, "max_batches", 0);
}

int is_network(section *s)
{
    return (strcmp(s->type, "[net]")==0
            || strcmp(s->type, "[network]")==0);
}

network parse_network_cfg(char *filename)
{
    list *sections = read_cfg(filename);
@@ -576,47 +573,46 @@
        s = (section *)n->val;
        options = s->options;
        layer l = {0};
        if(is_convolutional(s)){
        LAYER_TYPE lt = string_to_layer_type(s->type);
        if(lt == CONVOLUTIONAL){
            l = parse_convolutional(options, params);
        }else if(is_local(s)){
        }else if(lt == LOCAL){
            l = parse_local(options, params);
        }else if(is_activation(s)){
        }else if(lt == ACTIVE){
            l = parse_activation(options, params);
        }else if(is_deconvolutional(s)){
            l = parse_deconvolutional(options, params);
        }else if(is_rnn(s)){
        }else if(lt == RNN){
            l = parse_rnn(options, params);
        }else if(is_gru(s)){
        }else if(lt == GRU){
            l = parse_gru(options, params);
        }else if(is_crnn(s)){
        }else if(lt == CRNN){
            l = parse_crnn(options, params);
        }else if(is_connected(s)){
        }else if(lt == CONNECTED){
            l = parse_connected(options, params);
        }else if(is_crop(s)){
        }else if(lt == CROP){
            l = parse_crop(options, params);
        }else if(is_cost(s)){
        }else if(lt == COST){
            l = parse_cost(options, params);
        }else if(is_region(s)){
        }else if(lt == REGION){
            l = parse_region(options, params);
        }else if(is_detection(s)){
        }else if(lt == DETECTION){
            l = parse_detection(options, params);
        }else if(is_softmax(s)){
        }else if(lt == SOFTMAX){
            l = parse_softmax(options, params);
        }else if(is_normalization(s)){
        }else if(lt == NORMALIZATION){
            l = parse_normalization(options, params);
        }else if(is_batchnorm(s)){
        }else if(lt == BATCHNORM){
            l = parse_batchnorm(options, params);
        }else if(is_maxpool(s)){
        }else if(lt == MAXPOOL){
            l = parse_maxpool(options, params);
        }else if(is_reorg(s)){
        }else if(lt == REORG){
            l = parse_reorg(options, params);
        }else if(is_avgpool(s)){
        }else if(lt == AVGPOOL){
            l = parse_avgpool(options, params);
        }else if(is_route(s)){
        }else if(lt == ROUTE){
            l = parse_route(options, params, net);
        }else if(is_shortcut(s)){
        }else if(lt == SHORTCUT){
            l = parse_shortcut(options, params, net);
        }else if(is_dropout(s)){
        }else if(lt == DROPOUT){
            l = parse_dropout(options, params);
            l.output = net.layers[count-1].output;
            l.delta = net.layers[count-1].delta;
@@ -660,142 +656,6 @@
    return net;
}

LAYER_TYPE string_to_layer_type(char * type)
{

    if (strcmp(type, "[shortcut]")==0) return SHORTCUT;
    if (strcmp(type, "[crop]")==0) return CROP;
    if (strcmp(type, "[cost]")==0) return COST;
    if (strcmp(type, "[detection]")==0) return DETECTION;
    if (strcmp(type, "[region]")==0) return REGION;
    if (strcmp(type, "[local]")==0) return LOCAL;
    if (strcmp(type, "[deconv]")==0
            || strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL;
    if (strcmp(type, "[conv]")==0
            || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
    if (strcmp(type, "[activation]")==0) return ACTIVE;
    if (strcmp(type, "[net]")==0
            || strcmp(type, "[network]")==0) return NETWORK;
    if (strcmp(type, "[crnn]")==0) return CRNN;
    if (strcmp(type, "[gru]")==0) return GRU;
    if (strcmp(type, "[rnn]")==0) return RNN;
    if (strcmp(type, "[conn]")==0
            || strcmp(type, "[connected]")==0) return CONNECTED;
    if (strcmp(type, "[max]")==0
            || strcmp(type, "[maxpool]")==0) return MAXPOOL;
    if (strcmp(type, "[reorg]")==0) return REORG;
    if (strcmp(type, "[avg]")==0
            || strcmp(type, "[avgpool]")==0) return AVGPOOL;
    if (strcmp(type, "[dropout]")==0) return DROPOUT;
    if (strcmp(type, "[lrn]")==0
            || strcmp(type, "[normalization]")==0) return NORMALIZATION;
    if (strcmp(type, "[batchnorm]")==0) return BATCHNORM;
    if (strcmp(type, "[soft]")==0
            || strcmp(type, "[softmax]")==0) return SOFTMAX;
    if (strcmp(type, "[route]")==0) return ROUTE;
    return BLANK;
}

int is_shortcut(section *s)
{
    return (strcmp(s->type, "[shortcut]")==0);
}
int is_crop(section *s)
{
    return (strcmp(s->type, "[crop]")==0);
}
int is_cost(section *s)
{
    return (strcmp(s->type, "[cost]")==0);
}
int is_region(section *s)
{
    return (strcmp(s->type, "[region]")==0);
}
int is_detection(section *s)
{
    return (strcmp(s->type, "[detection]")==0);
}
int is_local(section *s)
{
    return (strcmp(s->type, "[local]")==0);
}
int is_deconvolutional(section *s)
{
    return (strcmp(s->type, "[deconv]")==0
            || strcmp(s->type, "[deconvolutional]")==0);
}
int is_convolutional(section *s)
{
    return (strcmp(s->type, "[conv]")==0
            || strcmp(s->type, "[convolutional]")==0);
}
int is_activation(section *s)
{
    return (strcmp(s->type, "[activation]")==0);
}
int is_network(section *s)
{
    return (strcmp(s->type, "[net]")==0
            || strcmp(s->type, "[network]")==0);
}
int is_crnn(section *s)
{
    return (strcmp(s->type, "[crnn]")==0);
}
int is_gru(section *s)
{
    return (strcmp(s->type, "[gru]")==0);
}
int is_rnn(section *s)
{
    return (strcmp(s->type, "[rnn]")==0);
}
int is_connected(section *s)
{
    return (strcmp(s->type, "[conn]")==0
            || strcmp(s->type, "[connected]")==0);
}
int is_reorg(section *s)
{
    return (strcmp(s->type, "[reorg]")==0);
}
int is_maxpool(section *s)
{
    return (strcmp(s->type, "[max]")==0
            || strcmp(s->type, "[maxpool]")==0);
}
int is_avgpool(section *s)
{
    return (strcmp(s->type, "[avg]")==0
            || strcmp(s->type, "[avgpool]")==0);
}
int is_dropout(section *s)
{
    return (strcmp(s->type, "[dropout]")==0);
}

int is_normalization(section *s)
{
    return (strcmp(s->type, "[lrn]")==0
            || strcmp(s->type, "[normalization]")==0);
}

int is_batchnorm(section *s)
{
    return (strcmp(s->type, "[batchnorm]")==0);
}

int is_softmax(section *s)
{
    return (strcmp(s->type, "[soft]")==0
            || strcmp(s->type, "[softmax]")==0);
}
int is_route(section *s)
{
    return (strcmp(s->type, "[route]")==0);
}

list *read_cfg(char *filename)
{
    FILE *file = fopen(filename, "r");
@@ -831,45 +691,6 @@
    return sections;
}

void save_weights_double(network net, char *filename)
{
    fprintf(stderr, "Saving doubled weights to %s\n", filename);
    FILE *fp = fopen(filename, "w");
    if(!fp) file_error(filename);

    fwrite(&net.learning_rate, sizeof(float), 1, fp);
    fwrite(&net.momentum, sizeof(float), 1, fp);
    fwrite(&net.decay, sizeof(float), 1, fp);
    fwrite(net.seen, sizeof(int), 1, fp);

    int i,j,k;
    for(i = 0; i < net.n; ++i){
        layer l = net.layers[i];
        if(l.type == CONVOLUTIONAL){
#ifdef GPU
            if(gpu_index >= 0){
                pull_convolutional_layer(l);
            }
#endif
            float zero = 0;
            fwrite(l.biases, sizeof(float), l.n, fp);
            fwrite(l.biases, sizeof(float), l.n, fp);

            for (j = 0; j < l.n; ++j){
                int index = j*l.c*l.size*l.size;
                fwrite(l.weights+index, sizeof(float), l.c*l.size*l.size, fp);
                for (k = 0; k < l.c*l.size*l.size; ++k) fwrite(&zero, sizeof(float), 1, fp);
            }
            for (j = 0; j < l.n; ++j){
                int index = j*l.c*l.size*l.size;
                for (k = 0; k < l.c*l.size*l.size; ++k) fwrite(&zero, sizeof(float), 1, fp);
                fwrite(l.weights+index, sizeof(float), l.c*l.size*l.size, fp);
            }
        }
    }
    fclose(fp);
}

void save_convolutional_weights_binary(layer l, FILE *fp)
{
#ifdef GPU
@@ -1147,16 +968,6 @@
        if(l.type == CONVOLUTIONAL){
            load_convolutional_weights(l, fp);
        }
        if(l.type == DECONVOLUTIONAL){
            int num = l.n*l.c*l.size*l.size;
            fread(l.biases, sizeof(float), l.n, fp);
            fread(l.weights, sizeof(float), num, fp);
#ifdef GPU
            if(gpu_index >= 0){
                push_deconvolutional_layer(l);
            }
#endif
        }
        if(l.type == CONNECTED){
            load_connected_weights(l, fp, transpose);
        }

 src/region_layer.c

@@ -34,7 +34,11 @@
        l.biases[i] = .5;
    }

    l.forward = forward_region_layer;
    l.backward = backward_region_layer;
#ifdef GPU
    l.forward_gpu = forward_region_layer_gpu;
    l.backward_gpu = backward_region_layer_gpu;
    l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
    l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
#endif
@@ -228,6 +232,45 @@
    axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
}

void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness)
{
    int i,j,n;
    float *predictions = l.output;
    //int per_cell = 5*num+classes;
    for (i = 0; i < l.w*l.h; ++i){
        int row = i / l.w;
        int col = i % l.w;
        for(n = 0; n < l.n; ++n){
            int index = i*l.n + n;
            int p_index = index * (l.classes + 5) + 4;
            float scale = predictions[p_index];
            int box_index = index * (l.classes + 5);
            boxes[index].x = (predictions[box_index + 0] + col + .5) / l.w * w;
            boxes[index].y = (predictions[box_index + 1] + row + .5) / l.h * h;
            if(0){
                boxes[index].x = (logistic_activate(predictions[box_index + 0]) + col) / l.w * w;
                boxes[index].y = (logistic_activate(predictions[box_index + 1]) + row) / l.h * h;
            }
            boxes[index].w = pow(logistic_activate(predictions[box_index + 2]), (l.sqrt?2:1)) * w;
            boxes[index].h = pow(logistic_activate(predictions[box_index + 3]), (l.sqrt?2:1)) * h;
            if(1){
                boxes[index].x = ((col + .5)/l.w + predictions[box_index + 0] * .5) * w;
                boxes[index].y = ((row + .5)/l.h + predictions[box_index + 1] * .5) * h;
                boxes[index].w = (exp(predictions[box_index + 2]) * .5) * w;
                boxes[index].h = (exp(predictions[box_index + 3]) * .5) * h;
            }
            for(j = 0; j < l.classes; ++j){
                int class_index = index * (l.classes + 5) + 5;
                float prob = scale*predictions[class_index+j];
                probs[index][j] = (prob > thresh) ? prob : 0;
            }
            if(only_objectness){
                probs[index][0] = scale;
            }
        }
    }
}

#ifdef GPU

void forward_region_layer_gpu(const region_layer l, network_state state)

 src/region_layer.h

@@ -9,6 +9,7 @@
region_layer make_region_layer(int batch, int h, int w, int n, int classes, int coords);
void forward_region_layer(const region_layer l, network_state state);
void backward_region_layer(const region_layer l, network_state state);
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);

#ifdef GPU
void forward_region_layer_gpu(const region_layer l, network_state state);

 src/reorg_layer.c

@@ -22,7 +22,13 @@
    int output_size = l.out_h * l.out_w * l.out_c * batch;
    l.output =  calloc(output_size, sizeof(float));
    l.delta =   calloc(output_size, sizeof(float));

    l.forward = forward_reorg_layer;
    l.backward = backward_reorg_layer;
    #ifdef GPU
    l.forward_gpu = forward_reorg_layer_gpu;
    l.backward_gpu = backward_reorg_layer_gpu;

    l.output_gpu  = cuda_make_array(l.output, output_size);
    l.delta_gpu   = cuda_make_array(l.delta, output_size);
    #endif

 src/rnn_layer.c

@@ -58,7 +58,13 @@
    l.output = l.output_layer->output;
    l.delta = l.output_layer->delta;

    l.forward = forward_rnn_layer;
    l.backward = backward_rnn_layer;
    l.update = update_rnn_layer;
#ifdef GPU
    l.forward_gpu = forward_rnn_layer_gpu;
    l.backward_gpu = backward_rnn_layer_gpu;
    l.update_gpu = update_rnn_layer_gpu;
    l.state_gpu = cuda_make_array(l.state, batch*hidden*(steps+1));
    l.output_gpu = l.output_layer->output_gpu;
    l.delta_gpu = l.output_layer->delta_gpu;

 src/rnn_layer.h

@@ -1,23 +1,24 @@

#ifndef GRU_LAYER_H
#define GRU_LAYER_H
#ifndef RNN_LAYER_H
#define RNN_LAYER_H

#include "activations.h"
#include "layer.h"
#include "network.h"
#define USET

layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);
layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log);

void forward_gru_layer(layer l, network_state state);
void backward_gru_layer(layer l, network_state state);
void update_gru_layer(layer l, int batch, float learning_rate, float momentum, float decay);
void forward_rnn_layer(layer l, network_state state);
void backward_rnn_layer(layer l, network_state state);
void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay);

#ifdef GPU
void forward_gru_layer_gpu(layer l, network_state state);
void backward_gru_layer_gpu(layer l, network_state state);
void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
void push_gru_layer(layer l);
void pull_gru_layer(layer l);
void forward_rnn_layer_gpu(layer l, network_state state);
void backward_rnn_layer_gpu(layer l, network_state state);
void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
void push_rnn_layer(layer l);
void pull_rnn_layer(layer l);
#endif

#endif

 src/rnn_vid.c

@@ -6,6 +6,8 @@

#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
image get_image_from_stream(CvCapture *cap);
image ipl_to_image(IplImage* src);

void reconstruct_picture(network net, float *features, image recon, image update, float rate, float momentum, float lambda, int smooth_size, int iters);


 src/route_layer.c

@@ -23,20 +23,26 @@
    l.inputs = outputs;
    l.delta =  calloc(outputs*batch, sizeof(float));
    l.output = calloc(outputs*batch, sizeof(float));;

    l.forward = forward_route_layer;
    l.backward = backward_route_layer;
    #ifdef GPU
    l.forward_gpu = forward_route_layer_gpu;
    l.backward_gpu = backward_route_layer_gpu;

    l.delta_gpu =  cuda_make_array(l.delta, outputs*batch);
    l.output_gpu = cuda_make_array(l.output, outputs*batch);
    #endif
    return l;
}

void forward_route_layer(const route_layer l, network net)
void forward_route_layer(const route_layer l, network_state state)
{
    int i, j;
    int offset = 0;
    for(i = 0; i < l.n; ++i){
        int index = l.input_layers[i];
        float *input = net.layers[index].output;
        float *input = state.net.layers[index].output;
        int input_size = l.input_sizes[i];
        for(j = 0; j < l.batch; ++j){
            copy_cpu(input_size, input + j*input_size, 1, l.output + offset + j*l.outputs, 1);
@@ -45,13 +51,13 @@
    }
}

void backward_route_layer(const route_layer l, network net)
void backward_route_layer(const route_layer l, network_state state)
{
    int i, j;
    int offset = 0;
    for(i = 0; i < l.n; ++i){
        int index = l.input_layers[i];
        float *delta = net.layers[index].delta;
        float *delta = state.net.layers[index].delta;
        int input_size = l.input_sizes[i];
        for(j = 0; j < l.batch; ++j){
            axpy_cpu(input_size, 1, l.delta + offset + j*l.outputs, 1, delta + j*input_size, 1);
@@ -61,13 +67,13 @@
}

#ifdef GPU
void forward_route_layer_gpu(const route_layer l, network net)
void forward_route_layer_gpu(const route_layer l, network_state state)
{
    int i, j;
    int offset = 0;
    for(i = 0; i < l.n; ++i){
        int index = l.input_layers[i];
        float *input = net.layers[index].output_gpu;
        float *input = state.net.layers[index].output_gpu;
        int input_size = l.input_sizes[i];
        for(j = 0; j < l.batch; ++j){
            copy_ongpu(input_size, input + j*input_size, 1, l.output_gpu + offset + j*l.outputs, 1);
@@ -76,13 +82,13 @@
    }
}

void backward_route_layer_gpu(const route_layer l, network net)
void backward_route_layer_gpu(const route_layer l, network_state state)
{
    int i, j;
    int offset = 0;
    for(i = 0; i < l.n; ++i){
        int index = l.input_layers[i];
        float *delta = net.layers[index].delta_gpu;
        float *delta = state.net.layers[index].delta_gpu;
        int input_size = l.input_sizes[i];
        for(j = 0; j < l.batch; ++j){
            axpy_ongpu(input_size, 1, l.delta_gpu + offset + j*l.outputs, 1, delta + j*input_size, 1);

 src/route_layer.h

@@ -6,12 +6,12 @@
typedef layer route_layer;

route_layer make_route_layer(int batch, int n, int *input_layers, int *input_size);
void forward_route_layer(const route_layer l, network net);
void backward_route_layer(const route_layer l, network net);
void forward_route_layer(const route_layer l, network_state state);
void backward_route_layer(const route_layer l, network_state state);

#ifdef GPU
void forward_route_layer_gpu(const route_layer l, network net);
void backward_route_layer_gpu(const route_layer l, network net);
void forward_route_layer_gpu(const route_layer l, network_state state);
void backward_route_layer_gpu(const route_layer l, network_state state);
#endif

#endif

 src/shortcut_layer.c

@@ -23,7 +23,13 @@

    l.delta =  calloc(l.outputs*batch, sizeof(float));
    l.output = calloc(l.outputs*batch, sizeof(float));;

    l.forward = forward_shortcut_layer;
    l.backward = backward_shortcut_layer;
    #ifdef GPU
    l.forward_gpu = forward_shortcut_layer_gpu;
    l.backward_gpu = backward_shortcut_layer_gpu;

    l.delta_gpu =  cuda_make_array(l.delta, l.outputs*batch);
    l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
    #endif

 src/softmax_layer.c

@@ -19,7 +19,13 @@
    l.outputs = inputs;
    l.output = calloc(inputs*batch, sizeof(float));
    l.delta = calloc(inputs*batch, sizeof(float));

    l.forward = forward_softmax_layer;
    l.backward = backward_softmax_layer;
    #ifdef GPU
    l.forward_gpu = forward_softmax_layer_gpu;
    l.backward_gpu = backward_softmax_layer_gpu;

    l.output_gpu = cuda_make_array(l.output, inputs*batch); 
    l.delta_gpu = cuda_make_array(l.delta, inputs*batch); 
    #endif

 src/utils.c

@@ -135,23 +135,20 @@
    printf("\n");
}

char *find_replace(char *str, char *orig, char *rep)
void find_replace(char *str, char *orig, char *rep, char *output)
{
    static char buffer[4096];
    static char buffer2[4096];
    static char buffer3[4096];
    char buffer[4096] = {0};
    char *p;

    if(!(p = strstr(str, orig)))  // Is 'orig' even in 'str'?
        return str;
    sprintf(buffer, "%s", str);
    if(!(p = strstr(buffer, orig))){  // Is 'orig' even in 'str'?
        sprintf(output, "%s", str);
        return;
    }

    strncpy(buffer2, str, p-str); // Copy characters from 'str' start to 'orig' st$
    buffer2[p-str] = '\0';
    *p = '\0';

    sprintf(buffer3, "%s%s%s", buffer2, rep, p+strlen(orig));
    sprintf(buffer, "%s", buffer3);

    return buffer;
    sprintf(output, "%s%s%s", buffer, rep, p+strlen(orig));
}

float sec(clock_t clocks)

 src/utils.h

@@ -19,7 +19,7 @@
void write_all(int fd, char *buffer, size_t bytes);
int read_all_fail(int fd, char *buffer, size_t bytes);
int write_all_fail(int fd, char *buffer, size_t bytes);
char *find_replace(char *str, char *orig, char *rep);
void find_replace(char *str, char *orig, char *rep, char *output);
void error(const char *s);
void malloc_error();
void file_error(char *s);

 src/voxel.c

@@ -5,6 +5,7 @@

#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
image get_image_from_stream(CvCapture *cap);
#endif

void extract_voxel(char *lfile, char *rfile, char *prefix)

 src/xnor_layer.c

File was deleted

 src/xnor_layer.h

File was deleted

 src/yolo.c

@@ -11,7 +11,6 @@
#endif

char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
image voc_labels[20];

void train_yolo(char *cfgfile, char *weightfile)
{
@@ -88,34 +87,6 @@
    save_weights(net, buff);
}

void convert_detections(float *predictions, int classes, int num, int square, int side, int w, int h, float thresh, float **probs, box *boxes, int only_objectness)
{
    int i,j,n;
    //int per_cell = 5*num+classes;
    for (i = 0; i < side*side; ++i){
        int row = i / side;
        int col = i % side;
        for(n = 0; n < num; ++n){
            int index = i*num + n;
            int p_index = side*side*classes + i*num + n;
            float scale = predictions[p_index];
            int box_index = side*side*(classes + num) + (i*num + n)*4;
            boxes[index].x = (predictions[box_index + 0] + col) / side * w;
            boxes[index].y = (predictions[box_index + 1] + row) / side * h;
            boxes[index].w = pow(predictions[box_index + 2], (square?2:1)) * w;
            boxes[index].h = pow(predictions[box_index + 3], (square?2:1)) * h;
            for(j = 0; j < classes; ++j){
                int class_index = i*classes;
                float prob = scale*predictions[class_index+j];
                probs[index][j] = (prob > thresh) ? prob : 0;
            }
            if(only_objectness){
                probs[index][0] = scale;
            }
        }
    }
}

void print_yolo_detections(FILE **fps, char *id, box *boxes, float **probs, int total, int classes, int w, int h)
{
    int i, j;
@@ -155,8 +126,6 @@

    layer l = net.layers[net.n-1];
    int classes = l.classes;
    int square = l.sqrt;
    int side = l.side;

    int j;
    FILE **fps = calloc(classes, sizeof(FILE *));
@@ -165,9 +134,9 @@
        snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
        fps[j] = fopen(buff, "w");
    }
    box *boxes = calloc(side*side*l.n, sizeof(box));
    float **probs = calloc(side*side*l.n, sizeof(float *));
    for(j = 0; j < side*side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));
    box *boxes = calloc(l.side*l.side*l.n, sizeof(box));
    float **probs = calloc(l.side*l.side*l.n, sizeof(float *));
    for(j = 0; j < l.side*l.side*l.n; ++j) probs[j] = calloc(classes, sizeof(float *));

    int m = plist->size;
    int i=0;
@@ -213,12 +182,12 @@
            char *path = paths[i+t-nthreads];
            char *id = basecfg(path);
            float *X = val_resized[t].data;
            float *predictions = network_predict(net, X);
            network_predict(net, X);
            int w = val[t].w;
            int h = val[t].h;
            convert_detections(predictions, classes, l.n, square, side, w, h, thresh, probs, boxes, 0);
            if (nms) do_nms_sort(boxes, probs, side*side*l.n, classes, iou_thresh);
            print_yolo_detections(fps, id, boxes, probs, side*side*l.n, classes, w, h);
            get_detection_boxes(l, w, h, thresh, probs, boxes, 0);
            if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, classes, iou_thresh);
            print_yolo_detections(fps, id, boxes, probs, l.side*l.side*l.n, classes, w, h);
            free(id);
            free_image(val[t]);
            free_image(val_resized[t]);
@@ -243,7 +212,6 @@

    layer l = net.layers[net.n-1];
    int classes = l.classes;
    int square = l.sqrt;
    int side = l.side;

    int j, k;
@@ -274,14 +242,15 @@
        image orig = load_image_color(path, 0, 0);
        image sized = resize_image(orig, net.w, net.h);
        char *id = basecfg(path);
        float *predictions = network_predict(net, sized.data);
        convert_detections(predictions, classes, l.n, square, side, 1, 1, thresh, probs, boxes, 1);
        network_predict(net, sized.data);
        get_detection_boxes(l, orig.w, orig.h, thresh, probs, boxes, 1);
        if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms);

        char *labelpath = find_replace(path, "images", "labels");
        labelpath = find_replace(labelpath, "JPEGImages", "labels");
        labelpath = find_replace(labelpath, ".jpg", ".txt");
        labelpath = find_replace(labelpath, ".JPEG", ".txt");
        char labelpath[4096];
        find_replace(path, "images", "labels", labelpath);
        find_replace(labelpath, "JPEGImages", "labels", labelpath);
        find_replace(labelpath, ".jpg", ".txt", labelpath);
        find_replace(labelpath, ".JPEG", ".txt", labelpath);

        int num_labels = 0;
        box_label *truth = read_boxes(labelpath, &num_labels);
@@ -315,7 +284,7 @@

void test_yolo(char *cfgfile, char *weightfile, char *filename, float thresh)
{

    image *alphabet = load_alphabet();
    network net = parse_network_cfg(cfgfile);
    if(weightfile){
        load_weights(&net, weightfile);
@@ -345,12 +314,12 @@
        image sized = resize_image(im, net.w, net.h);
        float *X = sized.data;
        time=clock();
        float *predictions = network_predict(net, X);
        network_predict(net, X);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        convert_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0);
        get_detection_boxes(l, 1, 1, thresh, probs, boxes, 1);
        if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
        //draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20);
        draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, voc_labels, 20);
        //draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, alphabet, 20);
        draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, voc_names, alphabet, 20);
        save_image(im, "predictions");
        show_image(im, "predictions");

@@ -366,13 +335,7 @@

void run_yolo(int argc, char **argv)
{
    int i;
    for(i = 0; i < 20; ++i){
        char buff[256];
        sprintf(buff, "data/labels/%s.png", voc_names[i]);
        voc_labels[i] = load_image_color(buff, 0, 0);
    }

    char *prefix = find_char_arg(argc, argv, "-prefix", 0);
    float thresh = find_float_arg(argc, argv, "-thresh", .2);
    int cam_index = find_int_arg(argc, argv, "-c", 0);
    int frame_skip = find_int_arg(argc, argv, "-s", 0);
@@ -388,5 +351,5 @@
    else if(0==strcmp(argv[2], "train")) train_yolo(cfg, weights);
    else if(0==strcmp(argv[2], "valid")) validate_yolo(cfg, weights);
    else if(0==strcmp(argv[2], "recall")) validate_yolo_recall(cfg, weights);
    else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, voc_labels, 20, frame_skip);
    else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix);
}

			@@ -41,10 +41,10 @@
			LDFLAGS+= -lcudnn
			endif

			OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
			OBJ=gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
			ifeq ($(GPU), 1)
			LDFLAGS+= -lstdc++
			OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
			OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
			endif

			OBJS = $(addprefix $(OBJDIR), $(OBJ))

			@@ -1,6 +1,19 @@
			import os
			import string
			import pipes

			l = ["person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
			#l = ["person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush", "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]

			l = string.printable

			for word in l:
			os.system("convert -fill black -background white -bordercolor white -border 4 -font futura-normal -pointsize 18 label:\"%s\" \"%s.png\""%(word, word))
			#os.system("convert -fill black -background white -bordercolor white -border 4 -font futura-normal -pointsize 18 label:\"%s\" \"%s.png\""%(word, word))
			if word == ' ':
			os.system('convert -fill black -background white -bordercolor white -font futura-normal -pointsize 64 label:"\ " 32.png')
			elif word == '\\':
			os.system('convert -fill black -background white -bordercolor white -font futura-normal -pointsize 64 label:"\\\\\\\\" 92.png')
			elif ord(word) in [9,10,11,12,13,14]:
			pass
			else:
			os.system("convert -fill black -background white -bordercolor white -font futura-normal -pointsize 64 label:%s \"%d.png\""%(pipes.quote(word), ord(word)))

			@@ -21,7 +21,12 @@
			l.output = calloc(batchinputs, sizeof(float));
			l.delta = calloc(batchinputs, sizeof(float));

			l.forward = forward_activation_layer;
			l.backward = backward_activation_layer;
			#ifdef GPU
			l.forward_gpu = forward_activation_layer_gpu;
			l.backward_gpu = backward_activation_layer_gpu;

			l.output_gpu = cuda_make_array(l.output, inputs*batch);
			l.delta_gpu = cuda_make_array(l.delta, inputs*batch);
			#endif

			@@ -8,6 +8,7 @@

			#ifdef OPENCV
			#include "opencv2/highgui/highgui_c.h"
			image get_image_from_stream(CvCapture *cap);
			#endif

			@@ -19,7 +19,11 @@
			int output_size = l.outputs * batch;
			l.output = calloc(output_size, sizeof(float));
			l.delta = calloc(output_size, sizeof(float));
			l.forward = forward_avgpool_layer;
			l.backward = backward_avgpool_layer;
			#ifdef GPU
			l.forward_gpu = forward_avgpool_layer_gpu;
			l.backward_gpu = backward_avgpool_layer_gpu;
			l.output_gpu = cuda_make_array(l.output, output_size);
			l.delta_gpu = cuda_make_array(l.delta, output_size);
			#endif

			@@ -28,7 +28,13 @@

			layer.rolling_mean = calloc(c, sizeof(float));
			layer.rolling_variance = calloc(c, sizeof(float));

			layer.forward = forward_batchnorm_layer;
			layer.backward = backward_batchnorm_layer;
			#ifdef GPU
			layer.forward_gpu = forward_batchnorm_layer_gpu;
			layer.backward_gpu = backward_batchnorm_layer_gpu;

			layer.output_gpu = cuda_make_array(layer.output, h * w * c * batch);
			layer.delta_gpu = cuda_make_array(layer.delta, h * w * c * batch);

			@@ -10,6 +10,7 @@

			#ifdef OPENCV
			#include "opencv2/highgui/highgui_c.h"
			image get_image_from_stream(CvCapture *cap);
			#endif

			list read_data_cfg(char filename)
			@@ -57,25 +58,26 @@
			#ifdef GPU
			int i;

			srand(time(0));
			float avg_loss = -1;
			char *base = basecfg(cfgfile);
			printf("%s\n", base);
			printf("%d\n", ngpus);
			network *nets = calloc(ngpus, sizeof(network));

			srand(time(0));
			int seed = rand();
			for(i = 0; i < ngpus; ++i){
			srand(seed);
			cuda_set_device(gpus[i]);
			nets[i] = parse_network_cfg(cfgfile);
			if(clear) *nets[i].seen = 0;
			if(weightfile){
			load_weights(&nets[i], weightfile);
			}
			}
			network net = nets[0];
			for(i = 0; i < ngpus; ++i){
			nets[i].seen = net.seen;
			if(clear) *nets[i].seen = 0;
			nets[i].learning_rate *= ngpus;
			}
			srand(time(0));
			network net = nets[0];

			int imgs = net.batch * net.subdivisions * ngpus;

			@@ -12,14 +12,10 @@
			#include "opencv2/highgui/highgui_c.h"
			#endif

			void convert_detections(float predictions, int classes, int num, int square, int side, int w, int h, float thresh, float probs, box boxes, int only_objectness);

			char *coco_classes[] = {"person","bicycle","car","motorcycle","airplane","bus","train","truck","boat","traffic light","fire hydrant","stop sign","parking meter","bench","bird","cat","dog","horse","sheep","cow","elephant","bear","zebra","giraffe","backpack","umbrella","handbag","tie","suitcase","frisbee","skis","snowboard","sports ball","kite","baseball bat","baseball glove","skateboard","surfboard","tennis racket","bottle","wine glass","cup","fork","knife","spoon","bowl","banana","apple","sandwich","orange","broccoli","carrot","hot dog","pizza","donut","cake","chair","couch","potted plant","bed","dining table","toilet","tv","laptop","mouse","remote","keyboard","cell phone","microwave","oven","toaster","sink","refrigerator","book","clock","vase","scissors","teddy bear","hair drier","toothbrush"};

			int coco_ids[] = {1,2,3,4,5,6,7,8,9,10,11,13,14,15,16,17,18,19,20,21,22,23,24,25,27,28,31,32,33,34,35,36,37,38,39,40,41,42,43,44,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,67,70,72,73,74,75,76,77,78,79,80,81,82,84,85,86,87,88,89,90};

			image coco_labels[80];

			void train_coco(char cfgfile, char weightfile)
			{
			//char *train_images = "/home/pjreddie/data/voc/test/train.txt";
			@@ -160,7 +156,6 @@

			layer l = net.layers[net.n-1];
			int classes = l.classes;
			int square = l.sqrt;
			int side = l.side;

			int j;
			@@ -217,10 +212,10 @@
			char *path = paths[i+t-nthreads];
			int image_id = get_coco_image_id(path);
			float *X = val_resized[t].data;
			float *predictions = network_predict(net, X);
			network_predict(net, X);
			int w = val[t].w;
			int h = val[t].h;
			convert_detections(predictions, classes, l.n, square, side, w, h, thresh, probs, boxes, 0);
			get_detection_boxes(l, w, h, thresh, probs, boxes, 0);
			if (nms) do_nms_sort(boxes, probs, sidesidel.n, classes, iou_thresh);
			print_cocos(fp, image_id, boxes, probs, sidesidel.n, classes, w, h);
			free_image(val[t]);
			@@ -250,7 +245,6 @@

			layer l = net.layers[net.n-1];
			int classes = l.classes;
			int square = l.sqrt;
			int side = l.side;

			int j, k;
			@@ -282,14 +276,15 @@
			image orig = load_image_color(path, 0, 0);
			image sized = resize_image(orig, net.w, net.h);
			char *id = basecfg(path);
			float *predictions = network_predict(net, sized.data);
			convert_detections(predictions, classes, l.n, square, side, 1, 1, thresh, probs, boxes, 1);
			network_predict(net, sized.data);
			get_detection_boxes(l, 1, 1, thresh, probs, boxes, 1);
			if (nms) do_nms(boxes, probs, sidesidel.n, 1, nms_thresh);

			char *labelpath = find_replace(path, "images", "labels");
			labelpath = find_replace(labelpath, "JPEGImages", "labels");
			labelpath = find_replace(labelpath, ".jpg", ".txt");
			labelpath = find_replace(labelpath, ".JPEG", ".txt");
			char labelpath[4096];
			find_replace(path, "images", "labels", labelpath);
			find_replace(labelpath, "JPEGImages", "labels", labelpath);
			find_replace(labelpath, ".jpg", ".txt", labelpath);
			find_replace(labelpath, ".JPEG", ".txt", labelpath);

			int num_labels = 0;
			box_label *truth = read_boxes(labelpath, &num_labels);
			@@ -323,7 +318,7 @@

			void test_coco(char cfgfile, char weightfile, char *filename, float thresh)
			{

			image *alphabet = load_alphabet();
			network net = parse_network_cfg(cfgfile);
			if(weightfile){
			load_weights(&net, weightfile);
			@@ -353,11 +348,11 @@
			image sized = resize_image(im, net.w, net.h);
			float *X = sized.data;
			time=clock();
			float *predictions = network_predict(net, X);
			network_predict(net, X);
			printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
			convert_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0);
			get_detection_boxes(l, 1, 1, thresh, probs, boxes, 0);
			if (nms) do_nms_sort(boxes, probs, l.sidel.sidel.n, l.classes, nms);
			draw_detections(im, l.sidel.sidel.n, thresh, boxes, probs, coco_classes, coco_labels, 80);
			draw_detections(im, l.sidel.sidel.n, thresh, boxes, probs, coco_classes, alphabet, 80);
			save_image(im, "prediction");
			show_image(im, "predictions");
			free_image(im);
			@@ -372,12 +367,7 @@

			void run_coco(int argc, char **argv)
			{
			int i;
			for(i = 0; i < 80; ++i){
			char buff[256];
			sprintf(buff, "data/labels/%s.png", coco_classes[i]);
			coco_labels[i] = load_image_color(buff, 0, 0);
			}
			char *prefix = find_char_arg(argc, argv, "-prefix", 0);
			float thresh = find_float_arg(argc, argv, "-thresh", .2);
			int cam_index = find_int_arg(argc, argv, "-c", 0);
			int frame_skip = find_int_arg(argc, argv, "-s", 0);
			@@ -394,5 +384,5 @@
			else if(0==strcmp(argv[2], "train")) train_coco(cfg, weights);
			else if(0==strcmp(argv[2], "valid")) validate_coco(cfg, weights);
			else if(0==strcmp(argv[2], "recall")) validate_coco_recall(cfg, weights);
			else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, coco_labels, 80, frame_skip);
			else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, coco_classes, 80, frame_skip, prefix);
			}

			@@ -36,6 +36,10 @@
			l.weights = calloc(outputs*inputs, sizeof(float));
			l.biases = calloc(outputs, sizeof(float));

			l.forward = forward_connected_layer;
			l.backward = backward_connected_layer;
			l.update = update_connected_layer;

			//float scale = 1./sqrt(inputs);
			float scale = sqrt(2./inputs);
			for(i = 0; i < outputs*inputs; ++i){
			@@ -66,6 +70,10 @@
			}

			#ifdef GPU
			l.forward_gpu = forward_connected_layer_gpu;
			l.backward_gpu = backward_connected_layer_gpu;
			l.update_gpu = update_connected_layer_gpu;

			l.weights_gpu = cuda_make_array(l.weights, outputs*inputs);
			l.biases_gpu = cuda_make_array(l.biases, outputs);

			@@ -209,6 +209,9 @@
			l.output = calloc(l.batchout_h out_w * n, sizeof(float));
			l.delta = calloc(l.batchout_h out_w * n, sizeof(float));

			l.forward = forward_convolutional_layer;
			l.backward = backward_convolutional_layer;
			l.update = update_convolutional_layer;
			if(binary){
			l.binary_weights = calloc(cnsize*size, sizeof(float));
			l.cweights = calloc(cnsize*size, sizeof(char));
			@@ -234,6 +237,10 @@
			}

			#ifdef GPU
			l.forward_gpu = forward_convolutional_layer_gpu;
			l.backward_gpu = backward_convolutional_layer_gpu;
			l.update_gpu = update_convolutional_layer_gpu;

			if(gpu_index >= 0){
			l.weights_gpu = cuda_make_array(l.weights, cnsize*size);
			l.weight_updates_gpu = cuda_make_array(l.weight_updates, cnsize*size);

			@@ -43,7 +43,13 @@
			l.delta = calloc(inputs*batch, sizeof(float));
			l.output = calloc(inputs*batch, sizeof(float));
			l.cost = calloc(1, sizeof(float));

			l.forward = forward_cost_layer;
			l.backward = backward_cost_layer;
			#ifdef GPU
			l.forward_gpu = forward_cost_layer_gpu;
			l.backward_gpu = backward_cost_layer_gpu;

			l.delta_gpu = cuda_make_array(l.output, inputs*batch);
			l.output_gpu = cuda_make_array(l.delta, inputs*batch);
			#endif

			@@ -64,7 +64,15 @@
			l.output = l.output_layer->output;
			l.delta = l.output_layer->delta;

			l.forward = forward_crnn_layer;
			l.backward = backward_crnn_layer;
			l.update = update_crnn_layer;

			#ifdef GPU
			l.forward_gpu = forward_crnn_layer_gpu;
			l.backward_gpu = backward_crnn_layer_gpu;
			l.update_gpu = update_crnn_layer_gpu;

			l.state_gpu = cuda_make_array(l.state, l.hiddenbatch(steps+1));
			l.output_gpu = l.output_layer->output_gpu;
			l.delta_gpu = l.output_layer->delta_gpu;

			@@ -10,6 +10,9 @@
			return float_to_image(w,h,c,l.output);
			}

			void backward_crop_layer(const crop_layer l, network_state state){}
			void backward_crop_layer_gpu(const crop_layer l, network_state state){}

			crop_layer make_crop_layer(int batch, int h, int w, int c, int crop_height, int crop_width, int flip, float angle, float saturation, float exposure)
			{
			fprintf(stderr, "Crop Layer: %d x %d -> %d x %d x %d image\n", h,w,crop_height,crop_width,c);
			@@ -30,7 +33,12 @@
			l.inputs = l.w * l.h * l.c;
			l.outputs = l.out_w * l.out_h * l.out_c;
			l.output = calloc(l.outputs*batch, sizeof(float));
			l.forward = forward_crop_layer;
			l.backward = backward_crop_layer;

			#ifdef GPU
			l.forward_gpu = forward_crop_layer_gpu;
			l.backward_gpu = backward_crop_layer_gpu;
			l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
			l.rand_gpu = cuda_make_array(0, l.batch*8);
			#endif

			@@ -136,17 +136,6 @@
			save_weights_upto(net, outfile, max);
			}

			void stacked(char cfgfile, char weightfile, char *outfile)
			{
			gpu_index = -1;
			network net = parse_network_cfg(cfgfile);
			if(weightfile){
			load_weights(&net, weightfile);
			}
			net.seen = 0;
			save_weights_double(net, outfile);
			}

			#include "convolutional_layer.h"
			void rescale_net(char cfgfile, char weightfile, char *outfile)
			{
			@@ -420,8 +409,6 @@
			partial(argv[2], argv[3], argv[4], atoi(argv[5]));
			} else if (0 == strcmp(argv[1], "average")){
			average(argc, argv);
			} else if (0 == strcmp(argv[1], "stacked")){
			stacked(argv[2], argv[3], argv[4]);
			} else if (0 == strcmp(argv[1], "visualize")){
			visualize(argv[2], (argc > 3) ? argv[3] : 0);
			} else if (0 == strcmp(argv[1], "imtest")){

			@@ -47,7 +47,7 @@
			for(i = 0; i < n; ++i){
			int index = rand()%m;
			random_paths[i] = paths[index];
			if(i == 0) printf("%s\n", paths[index]);
			//if(i == 0) printf("%s\n", paths[index]);
			}
			pthread_mutex_unlock(&mutex);
			return random_paths;
			@@ -58,7 +58,8 @@
			char *replace_paths = calloc(n, sizeof(char));
			int i;
			for(i = 0; i < n; ++i){
			char *replaced = find_replace(paths[i], find, replace);
			char replaced[4096];
			find_replace(paths[i], find, replace, replaced);
			replace_paths[i] = copy_string(replaced);
			}
			return replace_paths;
			@@ -198,12 +199,13 @@

			void fill_truth_swag(char path, float truth, int classes, int flip, float dx, float dy, float sx, float sy)
			{
			char *labelpath = find_replace(path, "images", "labels");
			labelpath = find_replace(labelpath, "JPEGImages", "labels");
			char labelpath[4096];
			find_replace(path, "images", "labels", labelpath);
			find_replace(labelpath, "JPEGImages", "labels", labelpath);
			find_replace(labelpath, ".jpg", ".txt", labelpath);
			find_replace(labelpath, ".JPG", ".txt", labelpath);
			find_replace(labelpath, ".JPEG", ".txt", labelpath);

			labelpath = find_replace(labelpath, ".jpg", ".txt");
			labelpath = find_replace(labelpath, ".JPG", ".txt");
			labelpath = find_replace(labelpath, ".JPEG", ".txt");
			int count = 0;
			box_label *boxes = read_boxes(labelpath, &count);
			randomize_boxes(boxes, count);
			@@ -235,13 +237,14 @@

			void fill_truth_region(char path, float truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy)
			{
			char *labelpath = find_replace(path, "images", "labels");
			labelpath = find_replace(labelpath, "JPEGImages", "labels");
			char labelpath[4096];
			find_replace(path, "images", "labels", labelpath);
			find_replace(labelpath, "JPEGImages", "labels", labelpath);

			labelpath = find_replace(labelpath, ".jpg", ".txt");
			labelpath = find_replace(labelpath, ".png", ".txt");
			labelpath = find_replace(labelpath, ".JPG", ".txt");
			labelpath = find_replace(labelpath, ".JPEG", ".txt");
			find_replace(labelpath, ".jpg", ".txt", labelpath);
			find_replace(labelpath, ".png", ".txt", labelpath);
			find_replace(labelpath, ".JPG", ".txt", labelpath);
			find_replace(labelpath, ".JPEG", ".txt", labelpath);
			int count = 0;
			box_label *boxes = read_boxes(labelpath, &count);
			randomize_boxes(boxes, count);
			@@ -282,13 +285,14 @@

			void fill_truth_detection(char path, int num_boxes, float truth, int classes, int flip, float dx, float dy, float sx, float sy)
			{
			char *labelpath = find_replace(path, "images", "labels");
			labelpath = find_replace(labelpath, "JPEGImages", "labels");
			char labelpath[4096];
			find_replace(path, "images", "labels", labelpath);
			find_replace(labelpath, "JPEGImages", "labels", labelpath);

			labelpath = find_replace(labelpath, ".jpg", ".txt");
			labelpath = find_replace(labelpath, ".png", ".txt");
			labelpath = find_replace(labelpath, ".JPG", ".txt");
			labelpath = find_replace(labelpath, ".JPEG", ".txt");
			find_replace(labelpath, ".jpg", ".txt", labelpath);
			find_replace(labelpath, ".png", ".txt", labelpath);
			find_replace(labelpath, ".JPG", ".txt", labelpath);
			find_replace(labelpath, ".JPEG", ".txt", labelpath);
			int count = 0;
			box_label *boxes = read_boxes(labelpath, &count);
			randomize_boxes(boxes, count);
			@@ -400,11 +404,12 @@
			int i;
			int count = 0;
			for(i = 0; i < n; ++i){
			char *label = find_replace(paths[i], "imgs", "labels");
			label = find_replace(label, "_iconl.jpeg", ".txt");
			char label[4096];
			find_replace(paths[i], "imgs", "labels", label);
			find_replace(label, "_iconl.jpeg", ".txt", label);
			FILE *file = fopen(label, "r");
			if(!file){
			label = find_replace(label, "labels", "labels2");
			find_replace(label, "labels", "labels2", label);
			file = fopen(label, "r");
			if(!file) continue;
			}
			@@ -518,16 +523,18 @@
			int id;
			float iou;

			char imlabel1 = find_replace(paths[i2], "imgs", "labels");
			imlabel1 = find_replace(imlabel1, "jpg", "txt");
			char imlabel1[4096];
			char imlabel2[4096];
			find_replace(paths[i*2], "imgs", "labels", imlabel1);
			find_replace(imlabel1, "jpg", "txt", imlabel1);
			FILE *fp1 = fopen(imlabel1, "r");

			while(fscanf(fp1, "%d %f", &id, &iou) == 2){
			if (d.y.vals[i][2id] < iou) d.y.vals[i][2id] = iou;
			}

			char imlabel2 = find_replace(paths[i2+1], "imgs", "labels");
			imlabel2 = find_replace(imlabel2, "jpg", "txt");
			find_replace(paths[i*2+1], "imgs", "labels", imlabel2);
			find_replace(imlabel2, "jpg", "txt", imlabel2);
			FILE *fp2 = fopen(imlabel2, "r");

			while(fscanf(fp2, "%d %f", &id, &iou) == 2){
			@@ -709,6 +716,7 @@
			{
			int i;
			load_args args = (load_args )ptr;
			if (args.threads == 0) args.threads = 1;
			data *out = args.d;
			int total = args.n;
			free(ptr);

			@@ -80,6 +80,10 @@
			l.output = calloc(l.batchout_h out_w * n, sizeof(float));
			l.delta = calloc(l.batchout_h out_w * n, sizeof(float));

			l.forward = forward_deconvolutional_layer;
			l.backward = backward_deconvolutional_layer;
			l.update = update_deconvolutional_layer;

			#ifdef GPU
			l.weights_gpu = cuda_make_array(l.weights, cnsize*size);
			l.weight_updates_gpu = cuda_make_array(l.weight_updates, cnsize*size);

			@@ -1,5 +1,6 @@
			#include "network.h"
			#include "detection_layer.h"
			#include "region_layer.h"
			#include "cost_layer.h"
			#include "utils.h"
			#include "parser.h"
			@@ -13,10 +14,10 @@
			#ifdef OPENCV
			#include "opencv2/highgui/highgui_c.h"
			#include "opencv2/imgproc/imgproc_c.h"
			void convert_detections(float predictions, int classes, int num, int square, int side, int w, int h, float thresh, float probs, box boxes, int only_objectness);
			image get_image_from_stream(CvCapture *cap);

			static char **demo_names;
			static image *demo_labels;
			static image *demo_alphabet;
			static int demo_classes;

			static float **probs;
			@@ -50,16 +51,23 @@
			{
			float nms = .4;

			detection_layer l = net.layers[net.n-1];
			layer l = net.layers[net.n-1];
			float *X = det_s.data;
			float *prediction = network_predict(net, X);

			memcpy(predictions[demo_index], prediction, l.outputs*sizeof(float));
			mean_arrays(predictions, FRAMES, l.outputs, avg);
			l.output = avg;

			free_image(det_s);
			convert_detections(avg, l.classes, l.n, l.sqrt, l.side, 1, 1, demo_thresh, probs, boxes, 0);
			if (nms > 0) do_nms(boxes, probs, l.sidel.sidel.n, l.classes, nms);
			if(l.type == DETECTION){
			get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
			} else if (l.type == REGION){
			get_region_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
			} else {
			error("Last layer must produce detections\n");
			}
			if (nms > 0) do_nms(boxes, probs, l.wl.hl.n, l.classes, nms);
			printf("\033[2J");
			printf("\033[1;1H");
			printf("\nFPS:%.1f\n",fps);
			@@ -69,7 +77,7 @@
			det = images[(demo_index + FRAMES/2 + 1)%FRAMES];
			demo_index = (demo_index + 1)%FRAMES;

			draw_detections(det, l.sidel.sidel.n, demo_thresh, boxes, probs, demo_names, demo_labels, demo_classes);
			draw_detections(det, l.wl.hl.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes);

			return 0;
			}
			@@ -83,12 +91,13 @@
			return (double)time.tv_sec + (double)time.tv_usec * .000001;
			}

			void demo(char cfgfile, char weightfile, float thresh, int cam_index, const char filename, char names, image labels, int classes, int frame_skip)
			void demo(char cfgfile, char weightfile, float thresh, int cam_index, const char filename, char names, int classes, int frame_skip, char prefix)
			{
			//skip = frame_skip;
			image *alphabet = load_alphabet();
			int delay = frame_skip;
			demo_names = names;
			demo_labels = labels;
			demo_alphabet = alphabet;
			demo_classes = classes;
			demo_thresh = thresh;
			printf("Demo\n");
			@@ -108,16 +117,16 @@

			if(!cap) error("Couldn't connect to webcam.\n");

			detection_layer l = net.layers[net.n-1];
			layer l = net.layers[net.n-1];
			int j;

			avg = (float *) calloc(l.outputs, sizeof(float));
			for(j = 0; j < FRAMES; ++j) predictions[j] = (float *) calloc(l.outputs, sizeof(float));
			for(j = 0; j < FRAMES; ++j) images[j] = make_image(1,1,3);

			boxes = (box )calloc(l.sidel.side*l.n, sizeof(box));
			probs = (float *)calloc(l.sidel.sidel.n, sizeof(float ));
			for(j = 0; j < l.sidel.sidel.n; ++j) probs[j] = (float )calloc(l.classes, sizeof(float ));
			boxes = (box )calloc(l.wl.h*l.n, sizeof(box));
			probs = (float *)calloc(l.wl.hl.n, sizeof(float ));
			for(j = 0; j < l.wl.hl.n; ++j) probs[j] = (float )calloc(l.classes, sizeof(float ));

			pthread_t fetch_thread;
			pthread_t detect_thread;
			@@ -141,9 +150,11 @@
			}

			int count = 0;
			cvNamedWindow("Demo", CV_WINDOW_NORMAL);
			cvMoveWindow("Demo", 0, 0);
			cvResizeWindow("Demo", 1352, 1013);
			if(!prefix){
			cvNamedWindow("Demo", CV_WINDOW_NORMAL);
			cvMoveWindow("Demo", 0, 0);
			cvResizeWindow("Demo", 1352, 1013);
			}

			double before = get_wall_time();

			@@ -153,7 +164,7 @@
			if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
			if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");

			if(1){
			if(!prefix){
			show_image(disp, "Demo");
			int c = cvWaitKey(1);
			if (c == 10){
			@@ -164,7 +175,7 @@
			}
			}else{
			char buff[256];
			sprintf(buff, "/home/pjreddie/tmp/bag_%07d", count);
			sprintf(buff, "%s_%08d", prefix, count);
			save_image(disp, buff);
			}

			@@ -201,7 +212,7 @@
			}
			}
			#else
			void demo(char cfgfile, char weightfile, float thresh, int cam_index, const char filename, char names, image labels, int classes, int frame_skip)
			void demo(char cfgfile, char weightfile, float thresh, int cam_index, const char filename, char names, int classes, int frame_skip, char prefix)
			{
			fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
			}

			@@ -2,6 +2,6 @@
			#define DEMO

			#include "image.h"
			void demo(char cfgfile, char weightfile, float thresh, int cam_index, const char filename, char names, image labels, int classes, int frame_skip);
			void demo(char cfgfile, char weightfile, float thresh, int cam_index, const char filename, char names, int classes, int frame_skip, char prefix);

			#endif

			@@ -30,7 +30,12 @@
			l.truths = l.sidel.side(1+l.coords+l.classes);
			l.output = calloc(batch*l.outputs, sizeof(float));
			l.delta = calloc(batch*l.outputs, sizeof(float));

			l.forward = forward_detection_layer;
			l.backward = backward_detection_layer;
			#ifdef GPU
			l.forward_gpu = forward_detection_layer_gpu;
			l.backward_gpu = backward_detection_layer_gpu;
			l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
			l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
			#endif
			@@ -216,6 +221,35 @@
			axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
			}

			void get_detection_boxes(layer l, int w, int h, float thresh, float *probs, box boxes, int only_objectness)
			{
			int i,j,n;
			float *predictions = l.output;
			//int per_cell = 5*num+classes;
			for (i = 0; i < l.side*l.side; ++i){
			int row = i / l.side;
			int col = i % l.side;
			for(n = 0; n < l.n; ++n){
			int index = i*l.n + n;
			int p_index = l.sidel.sidel.classes + i*l.n + n;
			float scale = predictions[p_index];
			int box_index = l.sidel.side(l.classes + l.n) + (il.n + n)4;
			boxes[index].x = (predictions[box_index + 0] + col) / l.side * w;
			boxes[index].y = (predictions[box_index + 1] + row) / l.side * h;
			boxes[index].w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w;
			boxes[index].h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h;
			for(j = 0; j < l.classes; ++j){
			int class_index = i*l.classes;
			float prob = scale*predictions[class_index+j];
			probs[index][j] = (prob > thresh) ? prob : 0;
			}
			if(only_objectness){
			probs[index][0] = scale;
			}
			}
			}
			}

			#ifdef GPU

			void forward_detection_layer_gpu(const detection_layer l, network_state state)

			@@ -9,6 +9,7 @@
			detection_layer make_detection_layer(int batch, int inputs, int n, int size, int classes, int coords, int rescore);
			void forward_detection_layer(const detection_layer l, network_state state);
			void backward_detection_layer(const detection_layer l, network_state state);
			void get_detection_boxes(layer l, int w, int h, float thresh, float *probs, box boxes, int only_objectness);

			#ifdef GPU
			void forward_detection_layer_gpu(const detection_layer l, network_state state);

			@@ -1,16 +1,16 @@
			#include "network.h"
			#include "detection_layer.h"
			#include "region_layer.h"
			#include "cost_layer.h"
			#include "utils.h"
			#include "parser.h"
			#include "box.h"
			#include "demo.h"

			#ifdef OPENCV
			#include "opencv2/highgui/highgui_c.h"
			#endif

			static char *voc_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"};
			static image voc_labels[20];

			void train_detector(char cfgfile, char weightfile)
			{
			@@ -49,13 +49,14 @@
			args.num_boxes = l.max_boxes;
			args.d = &buffer;
			args.type = DETECTION_DATA;
			args.threads = 4;

			args.angle = net.angle;
			args.exposure = net.exposure;
			args.saturation = net.saturation;
			args.hue = net.hue;

			pthread_t load_thread = load_data_in_thread(args);
			pthread_t load_thread = load_data(args);
			clock_t time;
			//while(iimgs < N120){
			while(get_current_batch(net) < net.max_batches){
			@@ -63,7 +64,7 @@
			time=clock();
			pthread_join(load_thread, 0);
			train = buffer;
			load_thread = load_data_in_thread(args);
			load_thread = load_data(args);

			/*
			int k;
			@@ -102,44 +103,6 @@
			save_weights(net, buff);
			}

			static void convert_detections(float predictions, int classes, int num, int square, int side, int w, int h, float thresh, float probs, box boxes, int only_objectness)
			{
			int i,j,n;
			//int per_cell = 5*num+classes;
			for (i = 0; i < side*side; ++i){
			int row = i / side;
			int col = i % side;
			for(n = 0; n < num; ++n){
			int index = i*num + n;
			int p_index = index * (classes + 5) + 4;
			float scale = predictions[p_index];
			int box_index = index * (classes + 5);
			boxes[index].x = (predictions[box_index + 0] + col + .5) / side * w;
			boxes[index].y = (predictions[box_index + 1] + row + .5) / side * h;
			if(0){
			boxes[index].x = (logistic_activate(predictions[box_index + 0]) + col) / side * w;
			boxes[index].y = (logistic_activate(predictions[box_index + 1]) + row) / side * h;
			}
			boxes[index].w = pow(logistic_activate(predictions[box_index + 2]), (square?2:1)) * w;
			boxes[index].h = pow(logistic_activate(predictions[box_index + 3]), (square?2:1)) * h;
			if(1){
			boxes[index].x = ((col + .5)/side + predictions[box_index + 0] * .5) * w;
			boxes[index].y = ((row + .5)/side + predictions[box_index + 1] * .5) * h;
			boxes[index].w = (exp(predictions[box_index + 2]) * .5) * w;
			boxes[index].h = (exp(predictions[box_index + 3]) * .5) * h;
			}
			for(j = 0; j < classes; ++j){
			int class_index = index * (classes + 5) + 5;
			float prob = scale*predictions[class_index+j];
			probs[index][j] = (prob > thresh) ? prob : 0;
			}
			if(only_objectness){
			probs[index][0] = scale;
			}
			}
			}
			}

			void print_detector_detections(FILE *fps, char id, box boxes, float *probs, int total, int classes, int w, int h)
			{
			int i, j;
			@@ -179,7 +142,6 @@

			layer l = net.layers[net.n-1];
			int classes = l.classes;
			int side = l.w;

			int j;
			FILE *fps = calloc(classes, sizeof(FILE ));
			@@ -188,9 +150,9 @@
			snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
			fps[j] = fopen(buff, "w");
			}
			box boxes = calloc(sideside*l.n, sizeof(box));
			float *probs = calloc(sidesidel.n, sizeof(float ));
			for(j = 0; j < sidesidel.n; ++j) probs[j] = calloc(classes, sizeof(float *));
			box boxes = calloc(l.wl.h*l.n, sizeof(box));
			float *probs = calloc(l.wl.hl.n, sizeof(float ));
			for(j = 0; j < l.wl.hl.n; ++j) probs[j] = calloc(classes, sizeof(float *));

			int m = plist->size;
			int i=0;
			@@ -235,12 +197,12 @@
			char *path = paths[i+t-nthreads];
			char *id = basecfg(path);
			float *X = val_resized[t].data;
			float *predictions = network_predict(net, X);
			network_predict(net, X);
			int w = val[t].w;
			int h = val[t].h;
			convert_detections(predictions, classes, l.n, 0, side, w, h, thresh, probs, boxes, 0);
			if (nms) do_nms_sort(boxes, probs, sidesidel.n, classes, nms);
			print_detector_detections(fps, id, boxes, probs, sidesidel.n, classes, w, h);
			get_region_boxes(l, w, h, thresh, probs, boxes, 0);
			if (nms) do_nms_sort(boxes, probs, l.wl.hl.n, classes, nms);
			print_detector_detections(fps, id, boxes, probs, l.wl.hl.n, classes, w, h);
			free(id);
			free_image(val[t]);
			free_image(val_resized[t]);
			@@ -268,8 +230,6 @@

			layer l = net.layers[net.n-1];
			int classes = l.classes;
			int square = l.sqrt;
			int side = l.side;

			int j, k;
			FILE *fps = calloc(classes, sizeof(FILE ));
			@@ -278,9 +238,9 @@
			snprintf(buff, 1024, "%s%s.txt", base, voc_names[j]);
			fps[j] = fopen(buff, "w");
			}
			box boxes = calloc(sideside*l.n, sizeof(box));
			float *probs = calloc(sidesidel.n, sizeof(float ));
			for(j = 0; j < sidesidel.n; ++j) probs[j] = calloc(classes, sizeof(float *));
			box boxes = calloc(l.wl.h*l.n, sizeof(box));
			float *probs = calloc(l.wl.hl.n, sizeof(float ));
			for(j = 0; j < l.wl.hl.n; ++j) probs[j] = calloc(classes, sizeof(float *));

			int m = plist->size;
			int i=0;
			@@ -299,18 +259,19 @@
			image orig = load_image_color(path, 0, 0);
			image sized = resize_image(orig, net.w, net.h);
			char *id = basecfg(path);
			float *predictions = network_predict(net, sized.data);
			convert_detections(predictions, classes, l.n, square, l.w, 1, 1, thresh, probs, boxes, 1);
			if (nms) do_nms(boxes, probs, sidesidel.n, 1, nms);
			network_predict(net, sized.data);
			get_region_boxes(l, 1, 1, thresh, probs, boxes, 1);
			if (nms) do_nms(boxes, probs, l.wl.hl.n, 1, nms);

			char *labelpath = find_replace(path, "images", "labels");
			labelpath = find_replace(labelpath, "JPEGImages", "labels");
			labelpath = find_replace(labelpath, ".jpg", ".txt");
			labelpath = find_replace(labelpath, ".JPEG", ".txt");
			char labelpath[4096];
			find_replace(path, "images", "labels", labelpath);
			find_replace(labelpath, "JPEGImages", "labels", labelpath);
			find_replace(labelpath, ".jpg", ".txt", labelpath);
			find_replace(labelpath, ".JPEG", ".txt", labelpath);

			int num_labels = 0;
			box_label *truth = read_boxes(labelpath, &num_labels);
			for(k = 0; k < sidesidel.n; ++k){
			for(k = 0; k < l.wl.hl.n; ++k){
			if(probs[k][0] > thresh){
			++proposals;
			}
			@@ -319,7 +280,7 @@
			++total;
			box t = {truth[j].x, truth[j].y, truth[j].w, truth[j].h};
			float best_iou = 0;
			for(k = 0; k < sidesidel.n; ++k){
			for(k = 0; k < l.wl.hl.n; ++k){
			float iou = box_iou(boxes[k], t);
			if(probs[k][0] > thresh && iou > best_iou){
			best_iou = iou;
			@@ -340,13 +301,12 @@

			void test_detector(char cfgfile, char weightfile, char *filename, float thresh)
			{

			image *alphabet = load_alphabet();
			network net = parse_network_cfg(cfgfile);
			if(weightfile){
			load_weights(&net, weightfile);
			}
			detection_layer l = net.layers[net.n-1];
			l.side = l.w;
			layer l = net.layers[net.n-1];
			set_batch_network(&net, 1);
			srand(2222222);
			clock_t time;
			@@ -354,9 +314,9 @@
			char *input = buff;
			int j;
			float nms=.4;
			box boxes = calloc(l.sidel.side*l.n, sizeof(box));
			float *probs = calloc(l.sidel.sidel.n, sizeof(float ));
			for(j = 0; j < l.sidel.sidel.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
			box boxes = calloc(l.wl.h*l.n, sizeof(box));
			float *probs = calloc(l.wl.hl.n, sizeof(float ));
			for(j = 0; j < l.wl.hl.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
			while(1){
			if(filename){
			strncpy(input, filename, 256);
			@@ -371,12 +331,12 @@
			image sized = resize_image(im, net.w, net.h);
			float *X = sized.data;
			time=clock();
			float *predictions = network_predict(net, X);
			network_predict(net, X);
			printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
			convert_detections(predictions, l.classes, l.n, 0, l.w, 1, 1, thresh, probs, boxes, 0);
			if (nms) do_nms_sort(boxes, probs, l.sidel.sidel.n, l.classes, nms);
			//draw_detections(im, l.sidel.sidel.n, thresh, boxes, probs, voc_names, voc_labels, 20);
			draw_detections(im, l.sidel.sidel.n, thresh, boxes, probs, voc_names, voc_labels, 20);
			get_region_boxes(l, 1, 1, thresh, probs, boxes, 0);
			if (nms) do_nms_sort(boxes, probs, l.wl.hl.n, l.classes, nms);
			//draw_detections(im, l.wl.hl.n, thresh, boxes, probs, voc_names, voc_labels, 20);
			draw_detections(im, l.wl.hl.n, thresh, boxes, probs, voc_names, alphabet, 20);
			save_image(im, "predictions");
			show_image(im, "predictions");

			@@ -392,14 +352,10 @@

			void run_detector(int argc, char **argv)
			{
			int i;
			for(i = 0; i < 20; ++i){
			char buff[256];
			sprintf(buff, "data/labels/%s.png", voc_names[i]);
			voc_labels[i] = load_image_color(buff, 0, 0);
			}

			char *prefix = find_char_arg(argc, argv, "-prefix", 0);
			float thresh = find_float_arg(argc, argv, "-thresh", .2);
			int cam_index = find_int_arg(argc, argv, "-c", 0);
			int frame_skip = find_int_arg(argc, argv, "-s", 0);
			if(argc < 4){
			fprintf(stderr, "usage: %s %s [train/test/valid] [cfg] [weights (optional)]\n", argv[0], argv[1]);
			return;
			@@ -412,4 +368,5 @@
			else if(0==strcmp(argv[2], "train")) train_detector(cfg, weights);
			else if(0==strcmp(argv[2], "valid")) validate_detector(cfg, weights);
			else if(0==strcmp(argv[2], "recall")) validate_detector_recall(cfg, weights);
			else if(0==strcmp(argv[2], "demo")) demo(cfg, weights, thresh, cam_index, filename, voc_names, 20, frame_skip, prefix);
			}

			@@ -15,7 +15,11 @@
			l.batch = batch;
			l.rand = calloc(inputs*batch, sizeof(float));
			l.scale = 1./(1.-probability);
			l.forward = forward_dropout_layer;
			l.backward = backward_dropout_layer;
			#ifdef GPU
			l.forward_gpu = forward_dropout_layer_gpu;
			l.backward_gpu = backward_dropout_layer_gpu;
			l.rand_gpu = cuda_make_array(l.rand, inputs*batch);
			#endif
			return l;

			@@ -85,7 +85,15 @@
			l.z_cpu = calloc(outputs*batch, sizeof(float));
			l.h_cpu = calloc(outputs*batch, sizeof(float));

			l.forward = forward_gru_layer;
			l.backward = backward_gru_layer;
			l.update = update_gru_layer;

			#ifdef GPU
			l.forward_gpu = forward_gru_layer_gpu;
			l.backward_gpu = backward_gru_layer_gpu;
			l.update_gpu = update_gru_layer_gpu;

			l.forgot_state_gpu = cuda_make_array(l.output, batch*outputs);
			l.forgot_delta_gpu = cuda_make_array(l.output, batch*outputs);
			l.prev_state_gpu = cuda_make_array(l.output, batch*outputs);

			@@ -1,24 +1,23 @@

			#ifndef RNN_LAYER_H
			#define RNN_LAYER_H
			#ifndef GRU_LAYER_H
			#define GRU_LAYER_H

			#include "activations.h"
			#include "layer.h"
			#include "network.h"
			#define USET

			layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log);
			layer make_gru_layer(int batch, int inputs, int outputs, int steps, int batch_normalize);

			void forward_rnn_layer(layer l, network_state state);
			void backward_rnn_layer(layer l, network_state state);
			void update_rnn_layer(layer l, int batch, float learning_rate, float momentum, float decay);
			void forward_gru_layer(layer l, network_state state);
			void backward_gru_layer(layer l, network_state state);
			void update_gru_layer(layer l, int batch, float learning_rate, float momentum, float decay);

			#ifdef GPU
			void forward_rnn_layer_gpu(layer l, network_state state);
			void backward_rnn_layer_gpu(layer l, network_state state);
			void update_rnn_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
			void push_rnn_layer(layer l);
			void pull_rnn_layer(layer l);
			void forward_gru_layer_gpu(layer l, network_state state);
			void backward_gru_layer_gpu(layer l, network_state state);
			void update_gru_layer_gpu(layer l, int batch, float learning_rate, float momentum, float decay);
			void push_gru_layer(layer l);
			void pull_gru_layer(layer l);
			#endif

			#endif

			@@ -10,6 +10,12 @@
			#define STB_IMAGE_WRITE_IMPLEMENTATION
			#include "stb_image_write.h"

			#ifdef OPENCV
			#include "opencv2/highgui/highgui_c.h"
			#include "opencv2/imgproc/imgproc_c.h"
			#endif


			int windows = 0;

			float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} };
			@@ -25,10 +31,66 @@
			return r;
			}

			void composite_image(image source, image dest, int dx, int dy)
			{
			int x,y,k;
			for(k = 0; k < source.c; ++k){
			for(y = 0; y < source.h; ++y){
			for(x = 0; x < source.w; ++x){
			float val = get_pixel(source, x, y, k);
			float val2 = get_pixel_extend(dest, dx+x, dy+y, k);
			set_pixel(dest, dx+x, dy+y, k, val * val2);
			}
			}
			}
			}

			image border_image(image a, int border)
			{
			image b = make_image(a.w + 2border, a.h + 2border, a.c);
			int x,y,k;
			for(k = 0; k < b.c; ++k){
			for(y = 0; y < b.h; ++y){
			for(x = 0; x < b.w; ++x){
			float val = get_pixel_extend(a, x - border, y - border, k);
			set_pixel(b, x, y, k, val);
			}
			}
			}
			return b;
			}

			image tile_images(image a, image b, int dx)
			{
			if(a.w == 0) return copy_image(b);
			image c = make_image(a.w + b.w + dx, (a.h > b.h) ? a.h : b.h, (a.c > b.c) ? a.c : b.c);
			fill_cpu(c.wc.hc.c, 1, c.data, 1);
			embed_image(a, c, 0, 0);
			composite_image(b, c, a.w + dx, 0);
			return c;
			}

			image get_label(image characters, char string)
			{
			image label = make_empty_image(0,0,0);
			while(*string){
			image l = characters[(int)*string];
			image n = tile_images(label, l, -4);
			free_image(label);
			label = n;
			++string;
			}
			image b = border_image(label, label.h*.25);
			free_image(label);
			return b;
			}

			void draw_label(image a, int r, int c, image label, const float *rgb)
			{
			float ratio = (float) label.w / label.h;
			int h = label.h;
			int h = a.h * .04;
			h = label.h;
			h = a.h * .06;
			int w = ratio * h;
			image rl = resize_image(label, w, h);
			if (r - h >= 0) r = r - h;
			@@ -102,7 +164,19 @@
			}
			}

			void draw_detections(image im, int num, float thresh, box boxes, float probs, char names, image labels, int classes)
			image *load_alphabet()
			{
			int i;
			image *alphabet = calloc(128, sizeof(image));
			for(i = 32; i < 127; ++i){
			char buff[256];
			sprintf(buff, "data/labels/%d.png", i);
			alphabet[i] = load_image_color(buff, 0, 0);
			}
			return alphabet;
			}

			void draw_detections(image im, int num, float thresh, box boxes, float probs, char names, image alphabet, int classes)
			{
			int i;

			@@ -111,7 +185,7 @@
			float prob = probs[i][class];
			if(prob > thresh){
			//int width = pow(prob, 1./2.)*30+1;
			int width = 8;
			int width = im.h * .012;
			printf("%s: %.0f%%\n", names[class], prob*100);
			int offset = class*1 % classes;
			float red = get_color(2,offset,classes);
			@@ -137,7 +211,10 @@
			if(bot > im.h-1) bot = im.h-1;

			draw_box_width(im, left, top, right, bot, width, red, green, blue);
			if (labels) draw_label(im, top + width, left, labels[class], rgb);
			if (alphabet) {
			image label = get_label(alphabet, names[class]);
			draw_label(im, top + width, left, label, rgb);
			}
			}
			}
			}
			@@ -368,6 +445,53 @@
			}

			#ifdef OPENCV

			image ipl_to_image(IplImage* src)
			{
			unsigned char data = (unsigned char )src->imageData;
			int h = src->height;
			int w = src->width;
			int c = src->nChannels;
			int step = src->widthStep;
			image out = make_image(w, h, c);
			int i, j, k, count=0;;

			for(k= 0; k < c; ++k){
			for(i = 0; i < h; ++i){
			for(j = 0; j < w; ++j){
			out.data[count++] = data[istep + jc + k]/255.;
			}
			}
			}
			return out;
			}

			image load_image_cv(char *filename, int channels)
			{
			IplImage* src = 0;
			int flag = -1;
			if (channels == 0) flag = -1;
			else if (channels == 1) flag = 0;
			else if (channels == 3) flag = 1;
			else {
			fprintf(stderr, "OpenCV can't force load with %d channels\n", channels);
			}

			if( (src = cvLoadImage(filename, flag)) == 0 )
			{
			fprintf(stderr, "Cannot load image \"%s\"\n", filename);
			char buff[256];
			sprintf(buff, "echo %s >> bad.list", filename);
			system(buff);
			return make_image(10,10,3);
			//exit(0);
			}
			image out = ipl_to_image(src);
			cvReleaseImage(&src);
			rgbgr_image(out);
			return out;
			}

			image get_image_from_stream(CvCapture *cap)
			{
			IplImage* src = cvQueryFrame(cap);
			@@ -376,9 +500,7 @@
			rgbgr_image(im);
			return im;
			}
			#endif

			#ifdef OPENCV
			void save_image_jpg(image p, const char *name)
			{
			image copy = copy_image(p);
			@@ -980,7 +1102,7 @@
			image aug = random_augment_image(im, 0, 320, 448, 320, .75);
			show_image(aug, "aug");
			free_image(aug);



			float exposure = 1.15;
			float saturation = 1.15;
			@@ -1001,55 +1123,6 @@
			#endif
			}

			#ifdef OPENCV
			image ipl_to_image(IplImage* src)
			{
			unsigned char data = (unsigned char )src->imageData;
			int h = src->height;
			int w = src->width;
			int c = src->nChannels;
			int step = src->widthStep;
			image out = make_image(w, h, c);
			int i, j, k, count=0;;

			for(k= 0; k < c; ++k){
			for(i = 0; i < h; ++i){
			for(j = 0; j < w; ++j){
			out.data[count++] = data[istep + jc + k]/255.;
			}
			}
			}
			return out;
			}

			image load_image_cv(char *filename, int channels)
			{
			IplImage* src = 0;
			int flag = -1;
			if (channels == 0) flag = -1;
			else if (channels == 1) flag = 0;
			else if (channels == 3) flag = 1;
			else {
			fprintf(stderr, "OpenCV can't force load with %d channels\n", channels);
			}

			if( (src = cvLoadImage(filename, flag)) == 0 )
			{
			fprintf(stderr, "Cannot load image \"%s\"\n", filename);
			char buff[256];
			sprintf(buff, "echo %s >> bad.list", filename);
			system(buff);
			return make_image(10,10,3);
			//exit(0);
			}
			image out = ipl_to_image(src);
			cvReleaseImage(&src);
			rgbgr_image(out);
			return out;
			}

			#endif


			image load_image_stb(char *filename, int channels)
			{
			@@ -1122,6 +1195,7 @@
			}
			void set_pixel(image m, int x, int y, int c, float val)
			{
			if (x < 0 \|\| y < 0 \|\| c < 0 \|\| x >= m.w \|\| y >= m.h \|\| c >= m.c) return;
			assert(x < m.w && y < m.h && c < m.c);
			m.data[cm.hm.w + y*m.w + x] = val;
			}
			@@ -1247,5 +1321,7 @@

			void free_image(image m)
			{
			free(m.data);
			if(m.data){
			free(m.data);
			}
			}

			@@ -8,11 +8,6 @@
			#include <math.h>
			#include "box.h"

			#ifdef OPENCV
			#include "opencv2/highgui/highgui_c.h"
			#include "opencv2/imgproc/imgproc_c.h"
			#endif

			typedef struct {
			int h;
			int w;
			@@ -26,6 +21,7 @@
			void draw_box_width(image a, int x1, int y1, int x2, int y2, int w, float r, float g, float b);
			void draw_bbox(image a, box bbox, int w, float r, float g, float b);
			void draw_label(image a, int r, int c, image label, const float *rgb);
			void write_label(image a, int r, int c, image characters, char string, float *rgb);
			void draw_detections(image im, int num, float thresh, box boxes, float probs, char names, image labels, int classes);
			image image_distance(image a, image b);
			void scale_image(image m, float s);
			@@ -64,12 +60,6 @@
			void show_image_layers(image p, char *name);
			void show_image_collapsed(image p, char *name);

			#ifdef OPENCV
			void save_image_jpg(image p, const char *name);
			image get_image_from_stream(CvCapture *cap);
			image ipl_to_image(IplImage* src);
			#endif

			void print_image(image m);

			image make_image(int w, int h, int c);
			@@ -79,6 +69,7 @@
			image copy_image(image p);
			image load_image(char *filename, int w, int h, int c);
			image load_image_color(char *filename, int w, int h);
			image *load_alphabet();

			float get_pixel(image m, int x, int y, int c);
			float get_pixel_extend(image m, int x, int y, int c);

			@@ -4,6 +4,8 @@
			#include "activations.h"
			#include "stddef.h"

			struct network_state;

			struct layer;
			typedef struct layer layer;

			@@ -42,6 +44,12 @@
			LAYER_TYPE type;
			ACTIVATION activation;
			COST_TYPE cost_type;
			void (*forward) (struct layer, struct network_state);
			void (*backward) (struct layer, struct network_state);
			void (*update) (struct layer, int, float, float, float);
			void (*forward_gpu) (struct layer, struct network_state);
			void (*backward_gpu) (struct layer, struct network_state);
			void (*update_gpu) (struct layer, int, float, float, float);
			int batch_normalize;
			int shortcut;
			int batch;

			@@ -60,8 +60,16 @@
			l.col_image = calloc(out_hout_wsizesizec, sizeof(float));
			l.output = calloc(l.batchout_h out_w * n, sizeof(float));
			l.delta = calloc(l.batchout_h out_w * n, sizeof(float));

			l.forward = forward_local_layer;
			l.backward = backward_local_layer;
			l.update = update_local_layer;

			#ifdef GPU
			l.forward_gpu = forward_local_layer_gpu;
			l.backward_gpu = backward_local_layer_gpu;
			l.update_gpu = update_local_layer_gpu;

			l.weights_gpu = cuda_make_array(l.weights, cnsizesizelocations);
			l.weight_updates_gpu = cuda_make_array(l.weight_updates, cnsizesizelocations);

			@@ -39,7 +39,11 @@
			l.indexes = calloc(output_size, sizeof(int));
			l.output = calloc(output_size, sizeof(float));
			l.delta = calloc(output_size, sizeof(float));
			l.forward = forward_maxpool_layer;
			l.backward = backward_maxpool_layer;
			#ifdef GPU
			l.forward_gpu = forward_maxpool_layer_gpu;
			l.backward_gpu = backward_maxpool_layer_gpu;
			l.indexes_gpu = cuda_make_int_array(output_size);
			l.output_gpu = cuda_make_array(l.output, output_size);
			l.delta_gpu = cuda_make_array(l.delta, output_size);

			@@ -15,7 +15,6 @@
			#include "local_layer.h"
			#include "convolutional_layer.h"
			#include "activation_layer.h"
			#include "deconvolutional_layer.h"
			#include "detection_layer.h"
			#include "region_layer.h"
			#include "normalization_layer.h"
			@@ -153,49 +152,7 @@
			if(l.delta){
			scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
			}
			if(l.type == CONVOLUTIONAL){
			forward_convolutional_layer(l, state);
			} else if(l.type == DECONVOLUTIONAL){
			forward_deconvolutional_layer(l, state);
			} else if(l.type == ACTIVE){
			forward_activation_layer(l, state);
			} else if(l.type == LOCAL){
			forward_local_layer(l, state);
			} else if(l.type == NORMALIZATION){
			forward_normalization_layer(l, state);
			} else if(l.type == BATCHNORM){
			forward_batchnorm_layer(l, state);
			} else if(l.type == DETECTION){
			forward_detection_layer(l, state);
			} else if(l.type == REGION){
			forward_region_layer(l, state);
			} else if(l.type == CONNECTED){
			forward_connected_layer(l, state);
			} else if(l.type == RNN){
			forward_rnn_layer(l, state);
			} else if(l.type == GRU){
			forward_gru_layer(l, state);
			} else if(l.type == CRNN){
			forward_crnn_layer(l, state);
			} else if(l.type == CROP){
			forward_crop_layer(l, state);
			} else if(l.type == COST){
			forward_cost_layer(l, state);
			} else if(l.type == SOFTMAX){
			forward_softmax_layer(l, state);
			} else if(l.type == MAXPOOL){
			forward_maxpool_layer(l, state);
			} else if(l.type == REORG){
			forward_reorg_layer(l, state);
			} else if(l.type == AVGPOOL){
			forward_avgpool_layer(l, state);
			} else if(l.type == DROPOUT){
			forward_dropout_layer(l, state);
			} else if(l.type == ROUTE){
			forward_route_layer(l, net);
			} else if(l.type == SHORTCUT){
			forward_shortcut_layer(l, state);
			}
			l.forward(l, state);
			state.input = l.output;
			}
			}
			@@ -207,29 +164,17 @@
			float rate = get_current_rate(net);
			for(i = 0; i < net.n; ++i){
			layer l = net.layers[i];
			if(l.type == CONVOLUTIONAL){
			update_convolutional_layer(l, update_batch, rate, net.momentum, net.decay);
			} else if(l.type == DECONVOLUTIONAL){
			update_deconvolutional_layer(l, rate, net.momentum, net.decay);
			} else if(l.type == CONNECTED){
			update_connected_layer(l, update_batch, rate, net.momentum, net.decay);
			} else if(l.type == RNN){
			update_rnn_layer(l, update_batch, rate, net.momentum, net.decay);
			} else if(l.type == GRU){
			update_gru_layer(l, update_batch, rate, net.momentum, net.decay);
			} else if(l.type == CRNN){
			update_crnn_layer(l, update_batch, rate, net.momentum, net.decay);
			} else if(l.type == LOCAL){
			update_local_layer(l, update_batch, rate, net.momentum, net.decay);
			if(l.update){
			l.update(l, update_batch, rate, net.momentum, net.decay);
			}
			}
			}

			float *get_network_output(network net)
			{
			#ifdef GPU
			if (gpu_index >= 0) return get_network_output_gpu(net);
			#endif
			#ifdef GPU
			if (gpu_index >= 0) return get_network_output_gpu(net);
			#endif
			int i;
			for(i = net.n-1; i > 0; --i) if(net.layers[i].type != COST) break;
			return net.layers[i].output;
			@@ -273,47 +218,7 @@
			state.delta = prev.delta;
			}
			layer l = net.layers[i];
			if(l.type == CONVOLUTIONAL){
			backward_convolutional_layer(l, state);
			} else if(l.type == DECONVOLUTIONAL){
			backward_deconvolutional_layer(l, state);
			} else if(l.type == ACTIVE){
			backward_activation_layer(l, state);
			} else if(l.type == NORMALIZATION){
			backward_normalization_layer(l, state);
			} else if(l.type == BATCHNORM){
			backward_batchnorm_layer(l, state);
			} else if(l.type == MAXPOOL){
			if(i != 0) backward_maxpool_layer(l, state);
			} else if(l.type == REORG){
			backward_reorg_layer(l, state);
			} else if(l.type == AVGPOOL){
			backward_avgpool_layer(l, state);
			} else if(l.type == DROPOUT){
			backward_dropout_layer(l, state);
			} else if(l.type == DETECTION){
			backward_detection_layer(l, state);
			} else if(l.type == REGION){
			backward_region_layer(l, state);
			} else if(l.type == SOFTMAX){
			if(i != 0) backward_softmax_layer(l, state);
			} else if(l.type == CONNECTED){
			backward_connected_layer(l, state);
			} else if(l.type == RNN){
			backward_rnn_layer(l, state);
			} else if(l.type == GRU){
			backward_gru_layer(l, state);
			} else if(l.type == CRNN){
			backward_crnn_layer(l, state);
			} else if(l.type == LOCAL){
			backward_local_layer(l, state);
			} else if(l.type == COST){
			backward_cost_layer(l, state);
			} else if(l.type == ROUTE){
			backward_route_layer(l, net);
			} else if(l.type == SHORTCUT){
			backward_shortcut_layer(l, state);
			}
			l.backward(l, state);
			}
			}

			@@ -406,11 +311,11 @@
			int i;
			for(i = 0; i < net->n; ++i){
			net->layers[i].batch = b;
			#ifdef CUDNN
			#ifdef CUDNN
			if(net->layers[i].type == CONVOLUTIONAL){
			cudnn_convolutional_setup(net->layers + i);
			}
			#endif
			#endif
			}
			}