Joseph Redmon
2016-07-19 9361292c429c0ba3400c31c7fa5d5e3d3cb6ab47
updates
99 files modified
163 ■■■■ changed files
Makefile 8 ●●●● patch | view | raw | blame | history
data/labels/aeroplane.png patch | view | raw | blame | history
data/labels/airplane.png patch | view | raw | blame | history
data/labels/apple.png patch | view | raw | blame | history
data/labels/backpack.png patch | view | raw | blame | history
data/labels/banana.png patch | view | raw | blame | history
data/labels/baseball bat.png patch | view | raw | blame | history
data/labels/baseball glove.png patch | view | raw | blame | history
data/labels/bear.png patch | view | raw | blame | history
data/labels/bed.png patch | view | raw | blame | history
data/labels/bench.png patch | view | raw | blame | history
data/labels/bicycle.png patch | view | raw | blame | history
data/labels/bird.png patch | view | raw | blame | history
data/labels/boat.png patch | view | raw | blame | history
data/labels/book.png patch | view | raw | blame | history
data/labels/bottle.png patch | view | raw | blame | history
data/labels/bowl.png patch | view | raw | blame | history
data/labels/broccoli.png patch | view | raw | blame | history
data/labels/bus.png patch | view | raw | blame | history
data/labels/cake.png patch | view | raw | blame | history
data/labels/car.png patch | view | raw | blame | history
data/labels/carrot.png patch | view | raw | blame | history
data/labels/cat.png patch | view | raw | blame | history
data/labels/cell phone.png patch | view | raw | blame | history
data/labels/chair.png patch | view | raw | blame | history
data/labels/clock.png patch | view | raw | blame | history
data/labels/couch.png patch | view | raw | blame | history
data/labels/cow.png patch | view | raw | blame | history
data/labels/cup.png patch | view | raw | blame | history
data/labels/dining table.png patch | view | raw | blame | history
data/labels/diningtable.png patch | view | raw | blame | history
data/labels/dog.png patch | view | raw | blame | history
data/labels/donut.png patch | view | raw | blame | history
data/labels/elephant.png patch | view | raw | blame | history
data/labels/fire hydrant.png patch | view | raw | blame | history
data/labels/fork.png patch | view | raw | blame | history
data/labels/frisbee.png patch | view | raw | blame | history
data/labels/giraffe.png patch | view | raw | blame | history
data/labels/hair drier.png patch | view | raw | blame | history
data/labels/handbag.png patch | view | raw | blame | history
data/labels/horse.png patch | view | raw | blame | history
data/labels/hot dog.png patch | view | raw | blame | history
data/labels/keyboard.png patch | view | raw | blame | history
data/labels/kite.png patch | view | raw | blame | history
data/labels/knife.png patch | view | raw | blame | history
data/labels/laptop.png patch | view | raw | blame | history
data/labels/microwave.png patch | view | raw | blame | history
data/labels/motorbike.png patch | view | raw | blame | history
data/labels/motorcycle.png patch | view | raw | blame | history
data/labels/mouse.png patch | view | raw | blame | history
data/labels/orange.png patch | view | raw | blame | history
data/labels/oven.png patch | view | raw | blame | history
data/labels/parking meter.png patch | view | raw | blame | history
data/labels/person.png patch | view | raw | blame | history
data/labels/pizza.png patch | view | raw | blame | history
data/labels/potted plant.png patch | view | raw | blame | history
data/labels/pottedplant.png patch | view | raw | blame | history
data/labels/refrigerator.png patch | view | raw | blame | history
data/labels/remote.png patch | view | raw | blame | history
data/labels/sandwich.png patch | view | raw | blame | history
data/labels/scissors.png patch | view | raw | blame | history
data/labels/sheep.png patch | view | raw | blame | history
data/labels/sink.png patch | view | raw | blame | history
data/labels/skateboard.png patch | view | raw | blame | history
data/labels/skis.png patch | view | raw | blame | history
data/labels/snowboard.png patch | view | raw | blame | history
data/labels/sofa.png patch | view | raw | blame | history
data/labels/spoon.png patch | view | raw | blame | history
data/labels/sports ball.png patch | view | raw | blame | history
data/labels/stop sign.png patch | view | raw | blame | history
data/labels/suitcase.png patch | view | raw | blame | history
data/labels/surfboard.png patch | view | raw | blame | history
data/labels/teddy bear.png patch | view | raw | blame | history
data/labels/tennis racket.png patch | view | raw | blame | history
data/labels/tie.png patch | view | raw | blame | history
data/labels/toaster.png patch | view | raw | blame | history
data/labels/toilet.png patch | view | raw | blame | history
data/labels/toothbrush.png patch | view | raw | blame | history
data/labels/traffic light.png patch | view | raw | blame | history
data/labels/train.png patch | view | raw | blame | history
data/labels/truck.png patch | view | raw | blame | history
data/labels/tv.png patch | view | raw | blame | history
data/labels/tvmonitor.png patch | view | raw | blame | history
data/labels/umbrella.png patch | view | raw | blame | history
data/labels/vase.png patch | view | raw | blame | history
data/labels/wine glass.png patch | view | raw | blame | history
data/labels/zebra.png patch | view | raw | blame | history
src/coco.c 3 ●●●● patch | view | raw | blame | history
src/connected_layer.c 4 ●●● patch | view | raw | blame | history
src/convolutional_layer.c 3 ●●●●● patch | view | raw | blame | history
src/darknet.c 73 ●●●● patch | view | raw | blame | history
src/data.c 10 ●●●● patch | view | raw | blame | history
src/demo.c 2 ●●● patch | view | raw | blame | history
src/detection_layer.h 4 ●●●● patch | view | raw | blame | history
src/image.c 7 ●●●● patch | view | raw | blame | history
src/layer.h 1 ●●●● patch | view | raw | blame | history
src/network.c 13 ●●●●● patch | view | raw | blame | history
src/network_kernels.cu 7 ●●●● patch | view | raw | blame | history
src/parser.c 28 ●●●●● patch | view | raw | blame | history
Makefile
@@ -1,6 +1,6 @@
GPU=0
CUDNN=0
OPENCV=0
GPU=1
CUDNN=1
OPENCV=1
DEBUG=0
ARCH= --gpu-architecture=compute_52 --gpu-code=compute_52
@@ -41,7 +41,7 @@
LDFLAGS+= -lcudnn
endif
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o
ifeq ($(GPU), 1) 
LDFLAGS+= -lstdc++ 
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
data/labels/aeroplane.png

data/labels/airplane.png

data/labels/apple.png

data/labels/backpack.png

data/labels/banana.png

data/labels/baseball bat.png

data/labels/baseball glove.png

data/labels/bear.png

data/labels/bed.png

data/labels/bench.png

data/labels/bicycle.png

data/labels/bird.png

data/labels/boat.png

data/labels/book.png

data/labels/bottle.png

data/labels/bowl.png

data/labels/broccoli.png

data/labels/bus.png

data/labels/cake.png

data/labels/car.png

data/labels/carrot.png

data/labels/cat.png

data/labels/cell phone.png

data/labels/chair.png

data/labels/clock.png

data/labels/couch.png

data/labels/cow.png

data/labels/cup.png

data/labels/dining table.png

data/labels/diningtable.png

data/labels/dog.png

data/labels/donut.png

data/labels/elephant.png

data/labels/fire hydrant.png

data/labels/fork.png

data/labels/frisbee.png

data/labels/giraffe.png

data/labels/hair drier.png

data/labels/handbag.png

data/labels/horse.png

data/labels/hot dog.png

data/labels/keyboard.png

data/labels/kite.png

data/labels/knife.png

data/labels/laptop.png

data/labels/microwave.png

data/labels/motorbike.png

data/labels/motorcycle.png

data/labels/mouse.png

data/labels/orange.png

data/labels/oven.png

data/labels/parking meter.png

data/labels/person.png

data/labels/pizza.png

data/labels/potted plant.png

data/labels/pottedplant.png

data/labels/refrigerator.png

data/labels/remote.png

data/labels/sandwich.png

data/labels/scissors.png

data/labels/sheep.png

data/labels/sink.png

data/labels/skateboard.png

data/labels/skis.png

data/labels/snowboard.png

data/labels/sofa.png

data/labels/spoon.png

data/labels/sports ball.png

data/labels/stop sign.png

data/labels/suitcase.png

data/labels/surfboard.png

data/labels/teddy bear.png

data/labels/tennis racket.png

data/labels/tie.png

data/labels/toaster.png

data/labels/toilet.png

data/labels/toothbrush.png

data/labels/traffic light.png

data/labels/train.png

data/labels/truck.png

data/labels/tv.png

data/labels/tvmonitor.png

data/labels/umbrella.png

data/labels/vase.png

data/labels/wine glass.png

data/labels/zebra.png

src/coco.c
@@ -348,9 +348,8 @@
        convert_detections(predictions, l.classes, l.n, l.sqrt, l.side, 1, 1, thresh, probs, boxes, 0);
        if (nms) do_nms_sort(boxes, probs, l.side*l.side*l.n, l.classes, nms);
        draw_detections(im, l.side*l.side*l.n, thresh, boxes, probs, coco_classes, coco_labels, 80);
        save_image(im, "prediction");
        show_image(im, "predictions");
        show_image(sized, "resized");
        free_image(im);
        free_image(sized);
#ifdef OPENCV
src/connected_layer.c
@@ -192,6 +192,9 @@
            l.weights[i*l.inputs + j] *= scale;
        }
        l.biases[i] -= l.rolling_mean[i] * scale;
        l.scales[i] = 1;
        l.rolling_mean[i] = 0;
        l.rolling_variance[i] = 1;
    }
}
@@ -257,7 +260,6 @@
        axpy_ongpu(l.outputs, 1, l.biases_gpu, 1, l.output_gpu + i*l.outputs, 1);
    }
    activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);
}
void backward_connected_layer_gpu(connected_layer l, network_state state)
src/convolutional_layer.c
@@ -301,6 +301,9 @@
            l.filters[i*l.c*l.size*l.size + j] *= scale;
        }
        l.biases[i] -= l.rolling_mean[i] * scale;
        l.scales[i] = 1;
        l.rolling_mean[i] = 0;
        l.rolling_variance[i] = 1;
    }
}
src/darknet.c
@@ -14,6 +14,7 @@
extern void run_imagenet(int argc, char **argv);
extern void run_yolo(int argc, char **argv);
extern void run_detector(int argc, char **argv);
extern void run_coco(int argc, char **argv);
extern void run_writing(int argc, char **argv);
extern void run_captcha(int argc, char **argv);
@@ -97,12 +98,13 @@
    for(i = 0; i < net.n; ++i){
        layer l = net.layers[i];
        if(l.type == CONVOLUTIONAL){
            ops += 2 * l.n * l.size*l.size*l.c * l.out_h*l.out_w;
            ops += 2l * l.n * l.size*l.size*l.c * l.out_h*l.out_w;
        } else if(l.type == CONNECTED){
            ops += 2 * l.inputs * l.outputs;
            ops += 2l * l.inputs * l.outputs;
        }
    }
    printf("Floating Point Operations: %ld\n", ops);
    printf("Floating Point Operations: %.2f Bn\n", (float)ops/1000000000.);
}
void partial(char *cfgfile, char *weightfile, char *outfile, int max)
@@ -164,6 +166,47 @@
    save_weights(net, outfile);
}
void reset_normalize_net(char *cfgfile, char *weightfile, char *outfile)
{
    gpu_index = -1;
    network net = parse_network_cfg(cfgfile);
    if (weightfile) {
        load_weights(&net, weightfile);
    }
    int i;
    for (i = 0; i < net.n; ++i) {
        layer l = net.layers[i];
        if (l.type == CONVOLUTIONAL && l.batch_normalize) {
            denormalize_convolutional_layer(l);
        }
        if (l.type == CONNECTED && l.batch_normalize) {
            denormalize_connected_layer(l);
        }
        if (l.type == GRU && l.batch_normalize) {
            denormalize_connected_layer(*l.input_z_layer);
            denormalize_connected_layer(*l.input_r_layer);
            denormalize_connected_layer(*l.input_h_layer);
            denormalize_connected_layer(*l.state_z_layer);
            denormalize_connected_layer(*l.state_r_layer);
            denormalize_connected_layer(*l.state_h_layer);
        }
    }
    save_weights(net, outfile);
}
layer normalize_layer(layer l, int n)
{
    int j;
    l.batch_normalize=1;
    l.scales = calloc(n, sizeof(float));
    for(j = 0; j < n; ++j){
        l.scales[j] = 1;
    }
    l.rolling_mean = calloc(n, sizeof(float));
    l.rolling_variance = calloc(n, sizeof(float));
    return l;
}
void normalize_net(char *cfgfile, char *weightfile, char *outfile)
{
    gpu_index = -1;
@@ -171,17 +214,23 @@
    if(weightfile){
        load_weights(&net, weightfile);
    }
    int i, j;
    int i;
    for(i = 0; i < net.n; ++i){
        layer l = net.layers[i];
        if(l.type == CONVOLUTIONAL){
            net.layers[i].batch_normalize=1;
            net.layers[i].scales = calloc(l.n, sizeof(float));
            for(j = 0; j < l.n; ++j){
                net.layers[i].scales[i] = 1;
        if(l.type == CONVOLUTIONAL && !l.batch_normalize){
            net.layers[i] = normalize_layer(l, l.n);
            }
            net.layers[i].rolling_mean = calloc(l.n, sizeof(float));
            net.layers[i].rolling_variance = calloc(l.n, sizeof(float));
        if (l.type == CONNECTED && !l.batch_normalize) {
            net.layers[i] = normalize_layer(l, l.outputs);
        }
        if (l.type == GRU && l.batch_normalize) {
            *l.input_z_layer = normalize_layer(*l.input_z_layer, l.input_z_layer->outputs);
            *l.input_r_layer = normalize_layer(*l.input_r_layer, l.input_r_layer->outputs);
            *l.input_h_layer = normalize_layer(*l.input_h_layer, l.input_h_layer->outputs);
            *l.state_z_layer = normalize_layer(*l.state_z_layer, l.state_z_layer->outputs);
            *l.state_r_layer = normalize_layer(*l.state_r_layer, l.state_r_layer->outputs);
            *l.state_h_layer = normalize_layer(*l.state_h_layer, l.state_h_layer->outputs);
            net.layers[i].batch_normalize=1;
        }
    }
    save_weights(net, outfile);
@@ -265,6 +314,8 @@
        average(argc, argv);
    } else if (0 == strcmp(argv[1], "yolo")){
        run_yolo(argc, argv);
    } else if (0 == strcmp(argv[1], "detector")){
        run_detector(argc, argv);
    } else if (0 == strcmp(argv[1], "cifar")){
        run_cifar(argc, argv);
    } else if (0 == strcmp(argv[1], "go")){
@@ -299,6 +350,8 @@
        change_rate(argv[2], atof(argv[3]), (argc > 4) ? atof(argv[4]) : 0);
    } else if (0 == strcmp(argv[1], "rgbgr")){
        rgbgr_net(argv[2], argv[3], argv[4]);
    } else if (0 == strcmp(argv[1], "reset")){
        reset_normalize_net(argv[2], argv[3], argv[4]);
    } else if (0 == strcmp(argv[1], "denormalize")){
        denormalize_net(argv[2], argv[3], argv[4]);
    } else if (0 == strcmp(argv[1], "normalize")){
src/data.c
@@ -297,11 +297,11 @@
        if (w < .01 || h < .01) continue;
        truth[i*5+0] = id;
        truth[i*5+1] = x;
        truth[i*5+2] = y;
        truth[i*5+3] = w;
        truth[i*5+4] = h;
        truth[i*5+0] = x;
        truth[i*5+1] = y;
        truth[i*5+2] = w;
        truth[i*5+3] = h;
        truth[i*5+4] = id;
    }
    free(boxes);
}
src/demo.c
@@ -8,7 +8,7 @@
#include "demo.h"
#include <sys/time.h>
#define FRAMES 3
#define FRAMES 1
#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
src/detection_layer.h
@@ -1,5 +1,5 @@
#ifndef REGION_LAYER_H
#define REGION_LAYER_H
#ifndef DETECTION_LAYER_H
#define DETECTION_LAYER_H
#include "layer.h"
#include "network.h"
src/image.c
@@ -109,14 +109,17 @@
        int class = max_index(probs[i], classes);
        float prob = probs[i][class];
        if(prob > thresh){
            int width = pow(prob, 1./2.)*10+1;
            width = 8;
            //int width = pow(prob, 1./2.)*30+1;
            int width = 8;
            printf("%s: %.0f%%\n", names[class], prob*100);
            int offset = class*1 % classes;
            float red = get_color(2,offset,classes);
            float green = get_color(1,offset,classes);
            float blue = get_color(0,offset,classes);
            float rgb[3];
            //width = prob*20+2;
            rgb[0] = red;
            rgb[1] = green;
            rgb[2] = blue;
src/layer.h
@@ -29,6 +29,7 @@
    BATCHNORM,
    NETWORK,
    XNOR,
    REGION,
    BLANK
} LAYER_TYPE;
src/network.c
@@ -16,6 +16,7 @@
#include "activation_layer.h"
#include "deconvolutional_layer.h"
#include "detection_layer.h"
#include "region_layer.h"
#include "normalization_layer.h"
#include "batchnorm_layer.h"
#include "maxpool_layer.h"
@@ -103,6 +104,8 @@
            return "softmax";
        case DETECTION:
            return "detection";
        case REGION:
            return "region";
        case DROPOUT:
            return "dropout";
        case CROP:
@@ -160,6 +163,8 @@
            forward_batchnorm_layer(l, state);
        } else if(l.type == DETECTION){
            forward_detection_layer(l, state);
        } else if(l.type == REGION){
            forward_region_layer(l, state);
        } else if(l.type == CONNECTED){
            forward_connected_layer(l, state);
        } else if(l.type == RNN){
@@ -230,11 +235,7 @@
    float sum = 0;
    int count = 0;
    for(i = 0; i < net.n; ++i){
        if(net.layers[i].type == COST){
            sum += net.layers[i].cost[0];
            ++count;
        }
        if(net.layers[i].type == DETECTION){
        if(net.layers[i].cost){
            sum += net.layers[i].cost[0];
            ++count;
        }
@@ -284,6 +285,8 @@
            backward_dropout_layer(l, state);
        } else if(l.type == DETECTION){
            backward_detection_layer(l, state);
        } else if(l.type == REGION){
            backward_region_layer(l, state);
        } else if(l.type == SOFTMAX){
            if(i != 0) backward_softmax_layer(l, state);
        } else if(l.type == CONNECTED){
src/network_kernels.cu
@@ -19,6 +19,7 @@
#include "gru_layer.h"
#include "crnn_layer.h"
#include "detection_layer.h"
#include "region_layer.h"
#include "convolutional_layer.h"
#include "activation_layer.h"
#include "deconvolutional_layer.h"
@@ -59,6 +60,8 @@
            forward_local_layer_gpu(l, state);
        } else if(l.type == DETECTION){
            forward_detection_layer_gpu(l, state);
        } else if(l.type == REGION){
            forward_region_layer_gpu(l, state);
        } else if(l.type == CONNECTED){
            forward_connected_layer_gpu(l, state);
        } else if(l.type == RNN){
@@ -125,6 +128,8 @@
            backward_dropout_layer_gpu(l, state);
        } else if(l.type == DETECTION){
            backward_detection_layer_gpu(l, state);
        } else if(l.type == REGION){
            backward_region_layer_gpu(l, state);
        } else if(l.type == NORMALIZATION){
            backward_normalization_layer_gpu(l, state);
        } else if(l.type == BATCHNORM){
@@ -181,7 +186,7 @@
    state.net = net;
    int x_size = get_network_input_size(net)*net.batch;
    int y_size = get_network_output_size(net)*net.batch;
    if(net.layers[net.n-1].type == DETECTION) y_size = net.layers[net.n-1].truths*net.batch;
    if(net.layers[net.n-1].truths) y_size = net.layers[net.n-1].truths*net.batch;
    if(!*net.input_gpu){
        *net.input_gpu = cuda_make_array(x, x_size);
        *net.truth_gpu = cuda_make_array(y, y_size);
src/parser.c
@@ -19,6 +19,7 @@
#include "softmax_layer.h"
#include "dropout_layer.h"
#include "detection_layer.h"
#include "region_layer.h"
#include "avgpool_layer.h"
#include "local_layer.h"
#include "route_layer.h"
@@ -51,6 +52,7 @@
int is_shortcut(section *s);
int is_cost(section *s);
int is_detection(section *s);
int is_region(section *s);
int is_route(section *s);
list *read_cfg(char *filename);
@@ -245,6 +247,25 @@
    return layer;
}
layer parse_region(list *options, size_params params)
{
    int coords = option_find_int(options, "coords", 4);
    int classes = option_find_int(options, "classes", 20);
    int num = option_find_int(options, "num", 1);
    layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords);
    assert(l.outputs == params.inputs);
    l.softmax = option_find_int(options, "softmax", 0);
    l.max_boxes = option_find_int_quiet(options, "max",30);
    l.jitter = option_find_float(options, "jitter", .2);
    l.rescore = option_find_int_quiet(options, "rescore",0);
    l.coord_scale = option_find_float(options, "coord_scale", 1);
    l.object_scale = option_find_float(options, "object_scale", 1);
    l.noobject_scale = option_find_float(options, "noobject_scale", 1);
    l.class_scale = option_find_float(options, "class_scale", 1);
    return l;
}
detection_layer parse_detection(list *options, size_params params)
{
    int coords = option_find_int(options, "coords", 1);
@@ -557,6 +578,8 @@
            l = parse_crop(options, params);
        }else if(is_cost(s)){
            l = parse_cost(options, params);
        }else if(is_region(s)){
            l = parse_region(options, params);
        }else if(is_detection(s)){
            l = parse_detection(options, params);
        }else if(is_softmax(s)){
@@ -620,6 +643,7 @@
    if (strcmp(type, "[crop]")==0) return CROP;
    if (strcmp(type, "[cost]")==0) return COST;
    if (strcmp(type, "[detection]")==0) return DETECTION;
    if (strcmp(type, "[region]")==0) return REGION;
    if (strcmp(type, "[local]")==0) return LOCAL;
    if (strcmp(type, "[deconv]")==0
            || strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL;
@@ -659,6 +683,10 @@
{
    return (strcmp(s->type, "[cost]")==0);
}
int is_region(section *s)
{
    return (strcmp(s->type, "[region]")==0);
}
int is_detection(section *s)
{
    return (strcmp(s->type, "[detection]")==0);