~speedprog/mtg/mtg_card_detector.git

parent: aebe9377 | patch | commit | ignore whitespace

Joseph Redmon

2016-09-01 8f1b4e0962857d402f9d017fcbf387ef0eceb7c4

updates and things

38 files modified

4 files added

3 files deleted

	.gitignore	1 ●●●●● patch \| view \| raw \| blame \| history
	Makefile	8 ●●●●● patch \| view \| raw \| blame \| history
	cfg/extraction22k.cfg	209 ●●●●● patch \| view \| raw \| blame \| history
	cfg/go.test.cfg	126 ●●●●● patch \| view \| raw \| blame \| history
	cfg/imagenet1k.dataset	11 ●●●●● patch \| view \| raw \| blame \| history
	cfg/yolo.cfg	16 ●●●●● patch \| view \| raw \| blame \| history
	cfg/yolo.train.cfg	257 ●●●●● patch \| view \| raw \| blame \| history
	data/dog.jpg	patch \| view \| raw \| blame \| history
	data/imagenet.labels.list	21842 ●●●●● patch \| view \| raw \| blame \| history
	data/imagenet.shortnames.list	21842 ●●●●● patch \| view \| raw \| blame \| history
	data/inet.labels.list	1000 ●●●●● patch \| view \| raw \| blame \| history
	data/shortnames.txt	1000 ●●●●● patch \| view \| raw \| blame \| history
	src/blas.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/blas_kernels.cu	15 ●●●●● patch \| view \| raw \| blame \| history
	src/classifier.c	222 ●●●●● patch \| view \| raw \| blame \| history
	src/coco.c	11 ●●●●● patch \| view \| raw \| blame \| history
	src/col2im.c	10 ●●●●● patch \| view \| raw \| blame \| history
	src/col2im_kernels.cu	1 ●●●●● patch \| view \| raw \| blame \| history
	src/convolutional_kernels.cu	3 ●●●●● patch \| view \| raw \| blame \| history
	src/convolutional_layer.c	17 ●●●●● patch \| view \| raw \| blame \| history
	src/cost_layer.c	19 ●●●●● patch \| view \| raw \| blame \| history
	src/darknet.c	5 ●●●●● patch \| view \| raw \| blame \| history
	src/data.c	37 ●●●●● patch \| view \| raw \| blame \| history
	src/data.h	10 ●●●●● patch \| view \| raw \| blame \| history
	src/demo.c	24 ●●●●● patch \| view \| raw \| blame \| history
	src/detection_layer.c	4 ●●●●● patch \| view \| raw \| blame \| history
	src/detector.c	5 ●●●●● patch \| view \| raw \| blame \| history
	src/im2col.c	10 ●●●●● patch \| view \| raw \| blame \| history
	src/im2col_kernels.cu	95 ●●●●● patch \| view \| raw \| blame \| history
	src/image.c	491 ●●●●● patch \| view \| raw \| blame \| history
	src/image.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/imagenet.c	237 ●●●●● patch \| view \| raw \| blame \| history
	src/layer.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/maxpool_layer.c	24 ●●●●● patch \| view \| raw \| blame \| history
	src/maxpool_layer.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/maxpool_layer_kernels.cu	28 ●●●●● patch \| view \| raw \| blame \| history
	src/network.c	2 ●●●●● patch \| view \| raw \| blame \| history
	src/network.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/parser.c	21 ●●●●● patch \| view \| raw \| blame \| history
	src/region_layer.c	6 ●●●●● patch \| view \| raw \| blame \| history
	src/tag.c	5 ●●●●● patch \| view \| raw \| blame \| history
	src/utils.c	7 ●●●●● patch \| view \| raw \| blame \| history
	src/utils.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/voxel.c	56 ●●●●● patch \| view \| raw \| blame \| history
	src/yolo.c	5 ●●●●● patch \| view \| raw \| blame \| history

 .gitignore

@@ -14,6 +14,7 @@
submission/
cfg/
darknet
.fuse*

# OS Generated #
.DS_Store*

 Makefile

@@ -1,6 +1,6 @@
GPU=1
CUDNN=1
OPENCV=1
GPU=0
CUDNN=0
OPENCV=0
DEBUG=0

ARCH= --gpu-architecture=compute_52 --gpu-code=compute_52
@@ -41,7 +41,7 @@
LDFLAGS+= -lcudnn
endif

OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
ifeq ($(GPU), 1) 
LDFLAGS+= -lstdc++ 
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o

 cfg/extraction22k.cfg

New file
@@ -0,0 +1,209 @@
[net]
batch=128
subdivisions=1
height=224
width=224
max_crop=320
channels=3
momentum=0.9
decay=0.0005

learning_rate=0.01
max_batches = 0
policy=steps
steps=444000,590000,970000
scales=.5,.2,.1

#policy=sigmoid
#gamma=.00008
#step=100000
#max_batches=200000

[convolutional]
batch_normalize=1
filters=64
size=7
stride=2
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=192
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=2048
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=2048
size=3
stride=1
pad=1
activation=leaky

[avgpool]

[connected]
output=21842
activation=leaky

[softmax]
groups=1

[cost]
type=sse


 cfg/go.test.cfg

@@ -3,102 +3,126 @@
subdivisions=1
height=19
width=19
channels=8
channels=1
momentum=0.9
decay=0.0005

learning_rate=0.1
max_batches = 0
policy=steps
steps=50000
scales=.1
policy=poly
power=4
max_batches=400000

[convolutional]
filters=512
filters=192
size=3
stride=1
pad=1
activation=leaky
activation=relu
batch_normalize=1

[convolutional]
filters=256
size=1
stride=1
pad=1
activation=leaky
batch_normalize=1

[convolutional]
filters=512
filters=192
size=3
stride=1
pad=1
activation=leaky
activation=relu
batch_normalize=1

[convolutional]
filters=256
size=1
stride=1
pad=1
activation=leaky
batch_normalize=1

[convolutional]
filters=512
filters=192
size=3
stride=1
pad=1
activation=leaky
activation=relu
batch_normalize=1

[convolutional]
filters=256
size=1
stride=1
pad=1
activation=leaky
batch_normalize=1

[convolutional]
filters=512
filters=192
size=3
stride=1
pad=1
activation=leaky
activation=relu
batch_normalize=1

[convolutional]
filters=256
size=1
stride=1
pad=1
activation=leaky
batch_normalize=1

[convolutional]
filters=512
filters=192
size=3
stride=1
pad=1
activation=leaky
activation=relu
batch_normalize=1

[convolutional]
filters=256
size=1
filters=192
size=3
stride=1
pad=1
activation=leaky
activation=relu
batch_normalize=1

[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1

[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1

[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1

[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1

[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1

[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1

[convolutional]
filters=192
size=3
stride=1
pad=1
activation=relu
batch_normalize=1


[convolutional]
filters=1
size=1
stride=1
pad=1
activation=leaky
activation=linear

[softmax]


 cfg/imagenet1k.dataset

@@ -1,9 +1,8 @@
classes=1000
labels = data/inet.labels.list
names = data/shortnames.txt
train = /data/imagenet/imagenet1k.train.list
valid = /data/imagenet/imagenet1k.valid.list
top=5
test = /Users/pjreddie/Documents/sites/selfie/paths.list
train  = /data/imagenet/imagenet1k.train.list
valid  = /data/imagenet/imagenet1k.valid.list
backup = /home/pjreddie/backup/
labels = data/imagenet.labels.list
names  = data/imagenet.shortnames.list
top=5


 cfg/yolo.cfg

@@ -1,11 +1,14 @@
[net]
batch=64
subdivisions=2
batch=1
subdivisions=1
height=448
width=448
channels=3
momentum=0.9
decay=0.0005
saturation=1.5
exposure=1.5
hue=.1

learning_rate=0.0005
policy=steps
@@ -13,15 +16,6 @@
scales=2.5,2,2,.1,.1
max_batches = 40000

[crop]
crop_width=448
crop_height=448
flip=0
angle=0
saturation = 1.5
exposure = 1.5
noadjust=1

[convolutional]
batch_normalize=1
filters=64

 cfg/yolo.train.cfg

New file
@@ -0,0 +1,257 @@
[net]
batch=64
subdivisions=4
height=448
width=448
channels=3
momentum=0.9
decay=0.0005
saturation=1.5
exposure=1.5
hue=.1

learning_rate=0.0005
policy=steps
steps=200,400,600,20000,30000
scales=2.5,2,2,.1,.1
max_batches = 40000

[convolutional]
batch_normalize=1
filters=64
size=7
stride=2
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=192
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[maxpool]
size=2
stride=2

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

#######

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=2
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[local]
size=3
stride=1
pad=1
filters=256
activation=leaky

[dropout]
probability=.5

[connected]
output= 1715
activation=linear

[detection]
classes=20
coords=4
rescore=1
side=7
num=3
softmax=0
sqrt=1
jitter=.2

object_scale=1
noobject_scale=.5
class_scale=1
coord_scale=5


 data/dog.jpg



 data/imagenet.labels.list

New file
Diff too large

 data/imagenet.shortnames.list

New file
Diff too large

 data/inet.labels.list

File was deleted

 data/shortnames.txt

File was deleted

 src/blas.h

@@ -40,6 +40,7 @@
void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
void scal_ongpu(int N, float ALPHA, float * X, int INCX);
void supp_ongpu(int N, float ALPHA, float * X, int INCX);
void mask_ongpu(int N, float * X, float mask_num, float * mask);
void const_ongpu(int N, float ALPHA, float *X, int INCX);
void pow_ongpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY);

 src/blas_kernels.cu

@@ -368,6 +368,14 @@
    if(i < N) X[i*INCX] = min(ALPHA, max(-ALPHA, X[i*INCX]));
}

__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if(i < N) {
        if((X[i*INCX] * X[i*INCX]) < (ALPHA * ALPHA)) X[i*INCX] = 0;
    }
}

__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
@@ -552,6 +560,12 @@
    check_error(cudaPeekAtLastError());
}

extern "C" void supp_ongpu(int N, float ALPHA, float * X, int INCX)
{
    supp_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
    check_error(cudaPeekAtLastError());
}

extern "C" void fill_ongpu(int N, float ALPHA, float * X, int INCX)
{
    fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
@@ -633,6 +647,7 @@
}



__global__ void weighted_sum_kernel(int n, float *a, float *b, float *s, float *c)
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;

 src/classifier.c

@@ -39,6 +39,18 @@
    return options;
}

float *get_regression_values(char **labels, int n)
{
    float *v = calloc(n, sizeof(float));
    int i;
    for(i = 0; i < n; ++i){
        char *p = strchr(labels[i], ' ');
        *p = 0;
        v[i] = atof(p+1);
    }
    return v;
}

void train_classifier(char *datacfg, char *cfgfile, char *weightfile, int clear)
{
    int nthreads = 8;
@@ -85,6 +97,7 @@
    args.angle = net.angle;
    args.exposure = net.exposure;
    args.saturation = net.saturation;
    args.hue = net.hue;
    args.size = net.w;

    args.paths = paths;
@@ -116,6 +129,7 @@
        printf("Loaded: %lf seconds\n", sec(clock()-time));
        time=clock();

        #ifdef OPENCV
        if(0){
            int u;
            for(u = 0; u < imgs; ++u){
@@ -124,6 +138,7 @@
                cvWaitKey(0);
            }
        }
        #endif

        float loss = train_network(net, train);
        if(avg_loss == -1) avg_loss = loss;
@@ -440,7 +455,7 @@

    char **labels = get_labels(label_list);
    list *plist = get_paths(valid_list);
    int scales[] = {192, 224, 288, 320, 352};
    int scales[] = {224, 288, 320, 352, 384};
    int nscales = sizeof(scales)/sizeof(scales[0]);

    char **paths = (char **)list_to_array(plist);
@@ -484,6 +499,88 @@
    }
}

void try_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename, int layer_num)
{
    network net = parse_network_cfg(cfgfile);
    if(weightfile){
        load_weights(&net, weightfile);
    }
    set_batch_network(&net, 1);
    srand(2222222);

    list *options = read_data_cfg(datacfg);

    char *name_list = option_find_str(options, "names", 0);
    if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list");
    int top = option_find_int(options, "top", 1);

    int i = 0;
    char **names = get_labels(name_list);
    clock_t time;
    int *indexes = calloc(top, sizeof(int));
    char buff[256];
    char *input = buff;
    while(1){
        if(filename){
            strncpy(input, filename, 256);
        }else{
            printf("Enter Image Path: ");
            fflush(stdout);
            input = fgets(input, 256, stdin);
            if(!input) return;
            strtok(input, "\n");
        }
        image orig = load_image_color(input, 0, 0);
        image r = resize_min(orig, 256);
        image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224);
        float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742};
        float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583};
        float var[3];
        var[0] = std[0]*std[0];
        var[1] = std[1]*std[1];
        var[2] = std[2]*std[2];

        normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h);

        float *X = im.data;
        time=clock();
        float *predictions = network_predict(net, X);
        
        layer l = net.layers[layer_num];
        for(i = 0; i < l.c; ++i){
        if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]);
        }
        #ifdef GPU
        cuda_pull_array(l.output_gpu, l.output, l.outputs);
        #endif
        for(i = 0; i < l.outputs; ++i){
            printf("%f\n", l.output[i]);
        }
        /*
        
        printf("\n\nWeights\n");
        for(i = 0; i < l.n*l.size*l.size*l.c; ++i){
            printf("%f\n", l.filters[i]);
        }

        printf("\n\nBiases\n");
        for(i = 0; i < l.n; ++i){
            printf("%f\n", l.biases[i]);
        }
        */

        top_predictions(net, top, indexes);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        for(i = 0; i < top; ++i){
            int index = indexes[i];
            printf("%s: %f\n", names[index], predictions[index]);
        }
        free_image(im);
        if (filename) break;
    }
}


void predict_classifier(char *datacfg, char *cfgfile, char *weightfile, char *filename)
{
    network net = parse_network_cfg(cfgfile);
@@ -649,6 +746,127 @@
}


void threat_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
{
#ifdef OPENCV
    float threat = 0;
    float roll = .2;

    printf("Classifier Demo\n");
    network net = parse_network_cfg(cfgfile);
    if(weightfile){
        load_weights(&net, weightfile);
    }
    set_batch_network(&net, 1);
    list *options = read_data_cfg(datacfg);

    srand(2222222);
    CvCapture * cap;

    if(filename){
        cap = cvCaptureFromFile(filename);
    }else{
        cap = cvCaptureFromCAM(cam_index);
    }

    int top = option_find_int(options, "top", 1);

    char *name_list = option_find_str(options, "names", 0);
    char **names = get_labels(name_list);

    int *indexes = calloc(top, sizeof(int));

    if(!cap) error("Couldn't connect to webcam.\n");
    //cvNamedWindow("Threat", CV_WINDOW_NORMAL); 
    //cvResizeWindow("Threat", 512, 512);
    float fps = 0;
    int i;

    int count = 0;

    while(1){
        ++count;
        struct timeval tval_before, tval_after, tval_result;
        gettimeofday(&tval_before, NULL);

        image in = get_image_from_stream(cap);
        if(!in.data) break;
        image in_s = resize_image(in, net.w, net.h);

    image out = in;
    int x1 = out.w / 20;
    int y1 = out.h / 20;
    int x2 = 2*x1;
    int y2 = out.h - out.h/20;

    int border = .01*out.h;
    int h = y2 - y1 - 2*border;
    int w = x2 - x1 - 2*border;

        float *predictions = network_predict(net, in_s.data);
        float curr_threat = predictions[0] * 0 + predictions[1] * .6 + predictions[2];
        threat = roll * curr_threat + (1-roll) * threat;

        draw_box_width(out, x2 + border, y1 + .02*h, x2 + .5 * w, y1 + .02*h + border, border, 0,0,0);
        if(threat > .97) {
            draw_box_width(out,  x2 + .5 * w + border,
                    y1 + .02*h - 2*border, 
                    x2 + .5 * w + 6*border, 
                    y1 + .02*h + 3*border, 3*border, 1,0,0);
        }
        draw_box_width(out,  x2 + .5 * w + border,
                y1 + .02*h - 2*border, 
                x2 + .5 * w + 6*border, 
                y1 + .02*h + 3*border, .5*border, 0,0,0);
        draw_box_width(out, x2 + border, y1 + .42*h, x2 + .5 * w, y1 + .42*h + border, border, 0,0,0);
        if(threat > .57) {
        draw_box_width(out,  x2 + .5 * w + border,
                y1 + .42*h - 2*border, 
                x2 + .5 * w + 6*border, 
                y1 + .42*h + 3*border, 3*border, 1,1,0);
            }
        draw_box_width(out,  x2 + .5 * w + border,
                y1 + .42*h - 2*border, 
                x2 + .5 * w + 6*border, 
                y1 + .42*h + 3*border, .5*border, 0,0,0);

        draw_box_width(out, x1, y1, x2, y2, border, 0,0,0);
        for(i = 0; i < threat * h ; ++i){
            float ratio = (float) i / h;
            float r = (ratio < .5) ? (2*(ratio)) : 1;
            float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5);
            draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0);
        }
        top_predictions(net, top, indexes);
        char buff[256];
        sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count);
        save_image(out, buff);

        printf("\033[2J");
        printf("\033[1;1H");
        printf("\nFPS:%.0f\n",fps);

        for(i = 0; i < top; ++i){
            int index = indexes[i];
            printf("%.1f%%: %s\n", predictions[index]*100, names[index]);
        }

        if(0){
            show_image(out, "Threat");
            cvWaitKey(10);
        }
        free_image(in_s);
        free_image(in);

        gettimeofday(&tval_after, NULL);
        timersub(&tval_after, &tval_before, &tval_result);
        float curr = 1000000.f/((long int)tval_result.tv_usec);
        fps = .9*fps + .1*curr;
    }
#endif
}


void demo_classifier(char *datacfg, char *cfgfile, char *weightfile, int cam_index, const char *filename)
{
#ifdef OPENCV
@@ -732,8 +950,10 @@
    char *layer_s = (argc > 7) ? argv[7]: 0;
    int layer = layer_s ? atoi(layer_s) : -1;
    if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename);
    else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s));
    else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, clear);
    else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename);
    else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);
    else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer);
    else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights);
    else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights);

 src/coco.c

@@ -25,6 +25,7 @@
    //char *train_images = "/home/pjreddie/data/voc/test/train.txt";
    //char *train_images = "/home/pjreddie/data/coco/train.txt";
    char *train_images = "data/coco.trainval.txt";
    //char *train_images = "data/bags.train.list";
    char *backup_directory = "/home/pjreddie/backup/";
    srand(time(0));
    data_seed = time(0);
@@ -63,6 +64,11 @@
    args.d = &buffer;
    args.type = REGION_DATA;

    args.angle = net.angle;
    args.exposure = net.exposure;
    args.saturation = net.saturation;
    args.hue = net.hue;

    pthread_t load_thread = load_data_in_thread(args);
    clock_t time;
    //while(i*imgs < N*120){
@@ -94,6 +100,11 @@
            sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
            save_weights(net, buff);
        }
        if(i%100==0){
            char buff[256];
            sprintf(buff, "%s/%s.backup", backup_directory, base);
            save_weights(net, buff);
        }
        free_data(train);
    }
    char buff[256];

 src/col2im.c

@@ -16,13 +16,9 @@
         int ksize,  int stride, int pad, float* data_im) 
{
    int c,h,w;
    int height_col = (height - ksize) / stride + 1;
    int width_col = (width - ksize) / stride + 1;
    if (pad){
        height_col = 1 + (height-1) / stride;
        width_col = 1 + (width-1) / stride;
        pad = ksize/2;
    }
    int height_col = (height + 2*pad - ksize) / stride + 1;
    int width_col = (width + 2*pad - ksize) / stride + 1;

    int channels_col = channels * ksize * ksize;
    for (c = 0; c < channels_col; ++c) {
        int w_offset = c % ksize;

 src/col2im_kernels.cu

@@ -46,7 +46,6 @@
        int ksize, int stride, int pad, float *data_im){
    // We are going to launch channels * height_col * width_col kernels, each
    // kernel responsible for copying a single-channel grid.
    pad = pad ? ksize/2 : 0;
    int height_col = (height + 2 * pad - ksize) / stride + 1;
    int width_col = (width + 2 * pad - ksize) / stride + 1;
    int num_kernels = channels * height * width;

 src/convolutional_kernels.cu

@@ -17,7 +17,7 @@
{
    int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (i >= n) return;
    binary[i] = (x[i] > 0) ? 1 : -1;
    binary[i] = (x[i] >= 0) ? 1 : -1;
}

void binarize_gpu(float *x, int n, float *binary)
@@ -60,6 +60,7 @@
    mean = mean / size;
    for(i = 0; i < size; ++i){
        binary[f*size + i] = (filters[f*size + i] > 0) ? mean : -mean;
        //binary[f*size + i] = filters[f*size + i];
    }
}


 src/convolutional_layer.c

@@ -70,18 +70,12 @@

int convolutional_out_height(convolutional_layer l)
{
    int h = l.h;
    if (!l.pad) h -= l.size;
    else h -= 1;
    return h/l.stride + 1;
    return (l.h + 2*l.pad - l.size) / l.stride + 1;
}

int convolutional_out_width(convolutional_layer l)
{
    int w = l.w;
    if (!l.pad) w -= l.size;
    else w -= 1;
    return w/l.stride + 1;
    return (l.w + 2*l.pad - l.size) / l.stride + 1;
}

image get_convolutional_image(convolutional_layer l)
@@ -148,8 +142,7 @@
    cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w); 
    cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w); 
    cudnnSetFilter4dDescriptor(l->filterDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size); 
    int padding = l->pad ? l->size/2 : 0;
    cudnnSetConvolution2dDescriptor(l->convDesc, padding, padding, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
    cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
    cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
            l->srcTensorDesc,
            l->filterDesc,
@@ -178,7 +171,7 @@
#endif
#endif

convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalize, int binary, int xnor)
convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor)
{
    int i;
    convolutional_layer l = {0};
@@ -193,7 +186,7 @@
    l.batch = batch;
    l.stride = stride;
    l.size = size;
    l.pad = pad;
    l.pad = padding;
    l.batch_normalize = batch_normalize;

    l.filters = calloc(c*n*size*size, sizeof(float));

 src/cost_layer.c

@@ -98,6 +98,15 @@
    cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs);
}

int float_abs_compare (const void * a, const void * b)
{
    float fa = *(const float*) a;
    if(fa < 0) fa = -fa;
    float fb = *(const float*) b;
    if(fb < 0) fb = -fb;
    return (fa > fb) - (fa < fb);
}

void forward_cost_layer_gpu(cost_layer l, network_state state)
{
    if (!state.truth) return;
@@ -111,6 +120,16 @@
        l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
    }

    if(l.ratio){
        cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
        qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare);
        int n = (1-l.ratio) * l.batch*l.inputs;
        float thresh = l.delta[n];
        thresh = 0;
        printf("%f\n", thresh);
        supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1);
    }

    cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs);
    l.cost[0] = sum_array(l.output, l.batch*l.inputs);
}

 src/darknet.c

@@ -13,7 +13,6 @@
#endif

extern void run_voxel(int argc, char **argv);
extern void run_imagenet(int argc, char **argv);
extern void run_yolo(int argc, char **argv);
extern void run_detector(int argc, char **argv);
extern void run_coco(int argc, char **argv);
@@ -327,9 +326,7 @@
    }
#endif

    if(0==strcmp(argv[1], "imagenet")){
        run_imagenet(argc, argv);
    } else if (0 == strcmp(argv[1], "average")){
    if (0 == strcmp(argv[1], "average")){
        average(argc, argv);
    } else if (0 == strcmp(argv[1], "yolo")){
        run_yolo(argc, argv);

 src/data.c

@@ -100,7 +100,7 @@
    return X;
}

matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float exposure, float saturation)
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure)
{
    int i;
    matrix X;
@@ -113,10 +113,7 @@
        image crop = random_augment_image(im, angle, min, max, size);
        int flip = rand_r(&data_seed)%2;
        if (flip) flip_image(crop);
        float exp = rand_uniform(1./exposure, exposure);
        float sat = rand_uniform(1./saturation, saturation);
        exposure_image(crop, exp);
        exposure_image(crop, sat);
        random_distort_image(crop, hue, saturation, exposure);

        /*
        show_image(im, "orig");
@@ -241,6 +238,7 @@
    labelpath = find_replace(labelpath, "JPEGImages", "labels");

    labelpath = find_replace(labelpath, ".jpg", ".txt");
    labelpath = find_replace(labelpath, ".png", ".txt");
    labelpath = find_replace(labelpath, ".JPG", ".txt");
    labelpath = find_replace(labelpath, ".JPEG", ".txt");
    int count = 0;
@@ -287,6 +285,7 @@
    labelpath = find_replace(labelpath, "JPEGImages", "labels");

    labelpath = find_replace(labelpath, ".jpg", ".txt");
    labelpath = find_replace(labelpath, ".png", ".txt");
    labelpath = find_replace(labelpath, ".JPG", ".txt");
    labelpath = find_replace(labelpath, ".JPEG", ".txt");
    int count = 0;
@@ -443,7 +442,7 @@
    }
}

data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter)
data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure)
{
    char **random_paths = get_random_paths(paths, n, m);
    int i;
@@ -485,6 +484,7 @@

        image sized = resize_image(cropped, w, h);
        if(flip) flip_image(sized);
        random_distort_image(sized, hue, saturation, exposure);
        d.X.vals[i] = sized.data;

        fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy);
@@ -611,7 +611,7 @@
    return d;
}

data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter)
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure)
{
    char **random_paths = get_random_paths(paths, n, m);
    int i;
@@ -651,6 +651,7 @@

        image sized = resize_image(cropped, w, h);
        if(flip) flip_image(sized);
        random_distort_image(sized, hue, saturation, exposure);
        d.X.vals[i] = sized.data;

        fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy);
@@ -679,17 +680,17 @@
    if (a.type == OLD_CLASSIFICATION_DATA){
        *a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
    } else if (a.type == CLASSIFICATION_DATA){
        *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
        *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
    } else if (a.type == SUPER_DATA){
        *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
    } else if (a.type == STUDY_DATA){
        *a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
        *a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
    } else if (a.type == WRITING_DATA){
        *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
    } else if (a.type == REGION_DATA){
        *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
        *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
    } else if (a.type == DETECTION_DATA){
        *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
        *a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
    } else if (a.type == SWAG_DATA){
        *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
    } else if (a.type == COMPARE_DATA){
@@ -698,7 +699,7 @@
        *(a.im) = load_image_color(a.path, 0, 0);
        *(a.resized) = resize_image(*(a.im), a.w, a.h);
    } else if (a.type == TAG_DATA){
        *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
        *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
        //*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
    }
    free(ptr);
@@ -740,13 +741,13 @@
    return d;
}

data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
{
    data d = {0};
    d.indexes = calloc(n, sizeof(int));
    if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
    d.shallow = 0;
    d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
    d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
    d.y = load_labels_paths(paths, n, labels, k);
    if(m) free(paths);
    return d;
@@ -782,25 +783,25 @@
    return d;
}

data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
{
    if(m) paths = get_random_paths(paths, n, m);
    data d = {0};
    d.shallow = 0;
    d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
    d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
    d.y = load_labels_paths(paths, n, labels, k);
    if(m) free(paths);
    return d;
}

data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation)
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
{
    if(m) paths = get_random_paths(paths, n, m);
    data d = {0};
    d.w = size;
    d.h = size;
    d.shallow = 0;
    d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
    d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
    d.y = load_tags_paths(paths, n, k);
    if(m) free(paths);
    return d;

 src/data.h

@@ -54,6 +54,7 @@
    float angle;
    float saturation;
    float exposure;
    float hue;
    data *d;
    image *im;
    image *resized;
@@ -74,11 +75,12 @@
data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
data load_data(char **paths, int n, int m, char **labels, int k, int w, int h);
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter);
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation);
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure);
data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure);
data load_data_super(char **paths, int n, int m, int w, int h, int scale);
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
data load_go(char *filename);

box_label *read_boxes(char *filename, int *n);

 src/demo.c

@@ -8,7 +8,7 @@
#include "demo.h"
#include <sys/time.h>

#define FRAMES 1
#define FRAMES 3

#ifdef OPENCV
#include "opencv2/highgui/highgui_c.h"
@@ -48,7 +48,7 @@

void *detect_in_thread(void *ptr)
{
    float nms = .4;
    float nms = .1;

    detection_layer l = net.layers[net.n-1];
    float *X = det_s.data;
@@ -153,13 +153,19 @@
            if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
            if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");

            show_image(disp, "Demo");
            int c = cvWaitKey(1);
            if (c == 10){
                if(frame_skip == 0) frame_skip = 60;
                else if(frame_skip == 4) frame_skip = 0;
                else if(frame_skip == 60) frame_skip = 4;   
                else frame_skip = 0;
            if(1){
                show_image(disp, "Demo");
                int c = cvWaitKey(1);
                if (c == 10){
                    if(frame_skip == 0) frame_skip = 60;
                    else if(frame_skip == 4) frame_skip = 0;
                    else if(frame_skip == 60) frame_skip = 4;   
                    else frame_skip = 0;
                }
            }else{
                char buff[256];
                sprintf(buff, "/home/pjreddie/tmp/bag_%07d", count);
                save_image(disp, buff);
            }

            pthread_join(fetch_thread, 0);

 src/detection_layer.c

@@ -22,6 +22,8 @@
    l.coords = coords;
    l.rescore = rescore;
    l.side = side;
    l.w = side;
    l.h = side;
    assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
    l.cost = calloc(1, sizeof(float));
    l.outputs = l.inputs;
@@ -44,6 +46,7 @@
    int locations = l.side*l.side;
    int i,j;
    memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
    //if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1);
    int b;
    if (l.softmax){
        for(b = 0; b < l.batch; ++b){
@@ -204,6 +207,7 @@


        printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
        //if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0);
    }
}


 src/detector.c

@@ -51,6 +51,11 @@
    args.d = &buffer;
    args.type = DETECTION_DATA;

    args.angle = net.angle;
    args.exposure = net.exposure;
    args.saturation = net.saturation;
    args.hue = net.hue;

    pthread_t load_thread = load_data_in_thread(args);
    clock_t time;
    //while(i*imgs < N*120){

 src/im2col.c

@@ -18,13 +18,9 @@
     int ksize,  int stride, int pad, float* data_col) 
{
    int c,h,w;
    int height_col = (height - ksize) / stride + 1;
    int width_col = (width - ksize) / stride + 1;
    if (pad){
        height_col = 1 + (height-1) / stride;
        width_col = 1 + (width-1) / stride;
        pad = ksize/2;
    }
    int height_col = (height + 2*pad - ksize) / stride + 1;
    int width_col = (width + 2*pad - ksize) / stride + 1;

    int channels_col = channels * ksize * ksize;
    for (c = 0; c < channels_col; ++c) {
        int w_offset = c % ksize;

 src/im2col_kernels.cu

@@ -33,8 +33,12 @@
            for (int j = 0; j < ksize; ++j) {
                int h = h_in + i;
                int w = w_in + j;

                *data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ?
                    data_im_ptr[i * width + j] : 0;

                //*data_col_ptr = data_im_ptr[ii * width + jj];

                data_col_ptr += height_col * width_col;
            }
        }
@@ -46,7 +50,6 @@
         int ksize, int stride, int pad, float *data_col){
    // We are going to launch channels * height_col * width_col kernels, each
    // kernel responsible for copying a single-channel grid.
    pad = pad ? ksize/2 : 0;
    int height_col = (height + 2 * pad - ksize) / stride + 1;
    int width_col = (width + 2 * pad - ksize) / stride + 1;
    int num_kernels = channels * height_col * width_col;
@@ -56,93 +59,3 @@
                stride, height_col,
                width_col, data_col);
}
/*
   __global__ void im2col_pad_kernel(float *im,
   int channels,  int height,  int width,
   int ksize,  int stride, float *data_col)
   {
   int c,h,w;
   int height_col = 1 + (height-1) / stride;
   int width_col = 1 + (width-1) / stride;
   int channels_col = channels * ksize * ksize;

   int pad = ksize/2;

   int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
   int col_size = height_col*width_col*channels_col;
   if (id >= col_size) return;

   int col_index = id;
   w = id % width_col;
   id /= width_col;
   h = id % height_col;
   id /= height_col;
   c = id % channels_col;
   id /= channels_col;

   int w_offset = c % ksize;
   int h_offset = (c / ksize) % ksize;
   int im_channel = c / ksize / ksize;
   int im_row = h_offset + h * stride - pad;
   int im_col = w_offset + w * stride - pad;

   int im_index = im_col + width*(im_row + height*im_channel);
   float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index];

   data_col[col_index] = val;
   }

   __global__ void im2col_nopad_kernel(float *im,
   int channels,  int height,  int width,
   int ksize,  int stride, float *data_col)
   {
   int c,h,w;
   int height_col = (height - ksize) / stride + 1;
   int width_col = (width - ksize) / stride + 1;
   int channels_col = channels * ksize * ksize;

   int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
   int col_size = height_col*width_col*channels_col;
   if (id >= col_size) return;

   int col_index = id;
   w = id % width_col;
   id /= width_col;
   h = id % height_col;
   id /= height_col;
   c = id % channels_col;
   id /= channels_col;

   int w_offset = c % ksize;
   int h_offset = (c / ksize) % ksize;
   int im_channel = c / ksize / ksize;
   int im_row = h_offset + h * stride;
   int im_col = w_offset + w * stride;

   int im_index = im_col + width*(im_row + height*im_channel);
   float val = (im_row < 0 || im_col < 0 || im_row >= height || im_col >= width) ? 0 : im[im_index];

   data_col[col_index] = val;
   }

   extern "C" void im2col_ongpu(float *im,
   int channels,  int height,  int width,
int ksize,  int stride,  int pad, float *data_col)
{

    int height_col = (height - ksize) / stride + 1;
    int width_col = (width - ksize) / stride + 1;
    int channels_col = channels * ksize * ksize;

    if (pad){
        height_col = 1 + (height-1) / stride;
        width_col = 1 + (width-1) / stride;
    }

    size_t n = channels_col*height_col*width_col;

    if(pad)im2col_pad_kernel<<<cuda_gridsize(n),BLOCK>>>(im,  channels, height, width, ksize, stride, data_col);
    else im2col_nopad_kernel<<<cuda_gridsize(n),BLOCK>>>(im,  channels, height, width, ksize, stride, data_col);
    check_error(cudaPeekAtLastError());
}
*/

 src/image.c

@@ -1,6 +1,7 @@
#include "image.h"
#include "utils.h"
#include "blas.h"
#include "cuda.h"
#include <stdio.h>
#include <math.h>

@@ -248,6 +249,26 @@

void normalize_image(image p)
{
    int i;
    float min = 9999999;
    float max = -999999;

    for(i = 0; i < p.h*p.w*p.c; ++i){
        float v = p.data[i];
        if(v < min) min = v;
        if(v > max) max = v;
    }
    if(max - min < .000000001){
        min = 0;
        max = 1;
    }
    for(i = 0; i < p.c*p.w*p.h; ++i){
        p.data[i] = (p.data[i] - min)/(max-min);
    }
}

void normalize_image2(image p)
{
    float *min = calloc(p.c, sizeof(float));
    float *max = calloc(p.c, sizeof(float));
    int i,j;
@@ -320,7 +341,6 @@
    }
    free_image(copy);
    if(0){
        //if(disp->height < 448 || disp->width < 448 || disp->height > 1000){
        int w = 448;
        int h = w*p.h/p.w;
        if(h > 1000){
@@ -334,202 +354,202 @@
    }
    cvShowImage(buff, disp);
    cvReleaseImage(&disp);
    }
}
#endif

    void show_image(image p, const char *name)
    {
void show_image(image p, const char *name)
{
#ifdef OPENCV
        show_image_cv(p, name);
    show_image_cv(p, name);
#else
        fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
        save_image(p, name);
    fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
    save_image(p, name);
#endif
    }
}

#ifdef OPENCV
    image get_image_from_stream(CvCapture *cap)
    {
        IplImage* src = cvQueryFrame(cap);
        if (!src) return make_empty_image(0,0,0);
        image im = ipl_to_image(src);
        rgbgr_image(im);
        return im;
    }
image get_image_from_stream(CvCapture *cap)
{
    IplImage* src = cvQueryFrame(cap);
    if (!src) return make_empty_image(0,0,0);
    image im = ipl_to_image(src);
    rgbgr_image(im);
    return im;
}
#endif

#ifdef OPENCV
    void save_image_jpg(image p, const char *name)
    {
        image copy = copy_image(p);
        rgbgr_image(copy);
        int x,y,k;
void save_image_jpg(image p, const char *name)
{
    image copy = copy_image(p);
    if(p.c == 3) rgbgr_image(copy);
    int x,y,k;

        char buff[256];
        sprintf(buff, "%s.jpg", name);
    char buff[256];
    sprintf(buff, "%s.jpg", name);

        IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c);
        int step = disp->widthStep;
        for(y = 0; y < p.h; ++y){
            for(x = 0; x < p.w; ++x){
                for(k= 0; k < p.c; ++k){
                    disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(copy,x,y,k)*255);
                }
    IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c);
    int step = disp->widthStep;
    for(y = 0; y < p.h; ++y){
        for(x = 0; x < p.w; ++x){
            for(k= 0; k < p.c; ++k){
                disp->imageData[y*step + x*p.c + k] = (unsigned char)(get_pixel(copy,x,y,k)*255);
            }
        }
        cvSaveImage(buff, disp,0);
        cvReleaseImage(&disp);
        free_image(copy);
    }
    cvSaveImage(buff, disp,0);
    cvReleaseImage(&disp);
    free_image(copy);
}
#endif

    void save_image(image im, const char *name)
    {
        #ifdef OPENCV
        save_image_jpg(im, name);
        #else
        char buff[256];
        //sprintf(buff, "%s (%d)", name, windows);
        sprintf(buff, "%s.png", name);
        unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
        int i,k;
        for(k = 0; k < im.c; ++k){
            for(i = 0; i < im.w*im.h; ++i){
                data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]);
void save_image(image im, const char *name)
{
#ifdef OPENCV
    save_image_jpg(im, name);
#else
    char buff[256];
    //sprintf(buff, "%s (%d)", name, windows);
    sprintf(buff, "%s.png", name);
    unsigned char *data = calloc(im.w*im.h*im.c, sizeof(char));
    int i,k;
    for(k = 0; k < im.c; ++k){
        for(i = 0; i < im.w*im.h; ++i){
            data[i*im.c+k] = (unsigned char) (255*im.data[i + k*im.w*im.h]);
        }
    }
    int success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c);
    free(data);
    if(!success) fprintf(stderr, "Failed to write image %s\n", buff);
#endif
}


void show_image_layers(image p, char *name)
{
    int i;
    char buff[256];
    for(i = 0; i < p.c; ++i){
        sprintf(buff, "%s - Layer %d", name, i);
        image layer = get_image_layer(p, i);
        show_image(layer, buff);
        free_image(layer);
    }
}

void show_image_collapsed(image p, char *name)
{
    image c = collapse_image_layers(p, 1);
    show_image(c, name);
    free_image(c);
}

image make_empty_image(int w, int h, int c)
{
    image out;
    out.data = 0;
    out.h = h;
    out.w = w;
    out.c = c;
    return out;
}

image make_image(int w, int h, int c)
{
    image out = make_empty_image(w,h,c);
    out.data = calloc(h*w*c, sizeof(float));
    return out;
}

image make_random_image(int w, int h, int c)
{
    image out = make_empty_image(w,h,c);
    out.data = calloc(h*w*c, sizeof(float));
    int i;
    for(i = 0; i < w*h*c; ++i){
        out.data[i] = (rand_normal() * .25) + .5;
    }
    return out;
}

image float_to_image(int w, int h, int c, float *data)
{
    image out = make_empty_image(w,h,c);
    out.data = data;
    return out;
}

image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
{
    int x, y, c;
    float cx = im.w/2.;
    float cy = im.h/2.;
    image rot = make_image(w, h, im.c);
    for(c = 0; c < im.c; ++c){
        for(y = 0; y < h; ++y){
            for(x = 0; x < w; ++x){
                float rx = cos(rad)*(x/s + dx/s -cx) - sin(rad)*(y/s + dy/s -cy) + cx;
                float ry = sin(rad)*(x/s + dx/s -cx) + cos(rad)*(y/s + dy/s -cy) + cy;
                float val = bilinear_interpolate(im, rx, ry, c);
                set_pixel(rot, x, y, c, val);
            }
        }
        int success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c);
        free(data);
        if(!success) fprintf(stderr, "Failed to write image %s\n", buff);
        #endif
    }
    return rot;
}


    void show_image_layers(image p, char *name)
    {
        int i;
        char buff[256];
        for(i = 0; i < p.c; ++i){
            sprintf(buff, "%s - Layer %d", name, i);
            image layer = get_image_layer(p, i);
            show_image(layer, buff);
            free_image(layer);
image rotate_image(image im, float rad)
{
    int x, y, c;
    float cx = im.w/2.;
    float cy = im.h/2.;
    image rot = make_image(im.w, im.h, im.c);
    for(c = 0; c < im.c; ++c){
        for(y = 0; y < im.h; ++y){
            for(x = 0; x < im.w; ++x){
                float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx;
                float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy;
                float val = bilinear_interpolate(im, rx, ry, c);
                set_pixel(rot, x, y, c, val);
            }
        }
    }
    return rot;
}

    void show_image_collapsed(image p, char *name)
    {
        image c = collapse_image_layers(p, 1);
        show_image(c, name);
        free_image(c);
    }
void translate_image(image m, float s)
{
    int i;
    for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s;
}

    image make_empty_image(int w, int h, int c)
    {
        image out;
        out.data = 0;
        out.h = h;
        out.w = w;
        out.c = c;
        return out;
    }
void scale_image(image m, float s)
{
    int i;
    for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s;
}

    image make_image(int w, int h, int c)
    {
        image out = make_empty_image(w,h,c);
        out.data = calloc(h*w*c, sizeof(float));
        return out;
    }

    image make_random_image(int w, int h, int c)
    {
        image out = make_empty_image(w,h,c);
        out.data = calloc(h*w*c, sizeof(float));
        int i;
        for(i = 0; i < w*h*c; ++i){
            out.data[i] = (rand_normal() * .25) + .5;
        }
        return out;
    }

    image float_to_image(int w, int h, int c, float *data)
    {
        image out = make_empty_image(w,h,c);
        out.data = data;
        return out;
    }

    image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
    {
        int x, y, c;
        float cx = im.w/2.;
        float cy = im.h/2.;
        image rot = make_image(w, h, im.c);
        for(c = 0; c < im.c; ++c){
            for(y = 0; y < h; ++y){
                for(x = 0; x < w; ++x){
                    float rx = cos(rad)*(x/s + dx/s -cx) - sin(rad)*(y/s + dy/s -cy) + cx;
                    float ry = sin(rad)*(x/s + dx/s -cx) + cos(rad)*(y/s + dy/s -cy) + cy;
                    float val = bilinear_interpolate(im, rx, ry, c);
                    set_pixel(rot, x, y, c, val);
image crop_image(image im, int dx, int dy, int w, int h)
{
    image cropped = make_image(w, h, im.c);
    int i, j, k;
    for(k = 0; k < im.c; ++k){
        for(j = 0; j < h; ++j){
            for(i = 0; i < w; ++i){
                int r = j + dy;
                int c = i + dx;
                float val = 0;
                r = constrain_int(r, 0, im.h-1);
                c = constrain_int(c, 0, im.w-1);
                if (r >= 0 && r < im.h && c >= 0 && c < im.w) {
                    val = get_pixel(im, c, r, k);
                }
                set_pixel(cropped, i, j, k, val);
            }
        }
        return rot;
    }

    image rotate_image(image im, float rad)
    {
        int x, y, c;
        float cx = im.w/2.;
        float cy = im.h/2.;
        image rot = make_image(im.w, im.h, im.c);
        for(c = 0; c < im.c; ++c){
            for(y = 0; y < im.h; ++y){
                for(x = 0; x < im.w; ++x){
                    float rx = cos(rad)*(x-cx) - sin(rad)*(y-cy) + cx;
                    float ry = sin(rad)*(x-cx) + cos(rad)*(y-cy) + cy;
                    float val = bilinear_interpolate(im, rx, ry, c);
                    set_pixel(rot, x, y, c, val);
                }
            }
        }
        return rot;
    }

    void translate_image(image m, float s)
    {
        int i;
        for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] += s;
    }

    void scale_image(image m, float s)
    {
        int i;
        for(i = 0; i < m.h*m.w*m.c; ++i) m.data[i] *= s;
    }

    image crop_image(image im, int dx, int dy, int w, int h)
    {
        image cropped = make_image(w, h, im.c);
        int i, j, k;
        for(k = 0; k < im.c; ++k){
            for(j = 0; j < h; ++j){
                for(i = 0; i < w; ++i){
                    int r = j + dy;
                    int c = i + dx;
                    float val = 0;
                    r = constrain_int(r, 0, im.h-1);
                    c = constrain_int(c, 0, im.w-1);
                    if (r >= 0 && r < im.h && c >= 0 && c < im.w) {
                        val = get_pixel(im, c, r, k);
                    }
                    set_pixel(cropped, i, j, k, val);
                }
            }
        }
        return cropped;
    }
    return cropped;
}

int best_3d_shift_r(image a, image b, int min, int max)
{
@@ -666,7 +686,7 @@
            v = max;
            if(max == 0){
                s = 0;
                h = -1;
                h = 0;
            }else{
                s = delta/max;
                if(r == max){
@@ -677,6 +697,7 @@
                    h = 4 + (r - g) / delta;
                }
                if (h < 0) h += 6;
                h = h/6.;
            }
            set_pixel(im, i, j, 0, h);
            set_pixel(im, i, j, 1, s);
@@ -694,7 +715,7 @@
    float f, p, q, t;
    for(j = 0; j < im.h; ++j){
        for(i = 0; i < im.w; ++i){
            h = get_pixel(im, i , j, 0);
            h = 6 * get_pixel(im, i , j, 0);
            s = get_pixel(im, i , j, 1);
            v = get_pixel(im, i , j, 2);
            if (s == 0) {
@@ -781,6 +802,18 @@
    }
}

void translate_image_channel(image im, int c, float v)
{
    int i, j;
    for(j = 0; j < im.h; ++j){
        for(i = 0; i < im.w; ++i){
            float pix = get_pixel(im, i, j, c);
            pix = pix+v;
            set_pixel(im, i, j, c, pix);
        }
    }
}

image binarize_image(image im)
{
    image c = copy_image(im);
@@ -800,6 +833,19 @@
    constrain_image(im);
}

void hue_image(image im, float hue)
{
    rgb_to_hsv(im);
    int i;
    for(i = 0; i < im.w*im.h; ++i){
        im.data[i] = im.data[i] + hue;
        if (im.data[i] > 1) im.data[i] -= 1;
        if (im.data[i] < 0) im.data[i] += 1;
    }
    hsv_to_rgb(im);
    constrain_image(im);
}

void exposure_image(image im, float sat)
{
    rgb_to_hsv(im);
@@ -808,6 +854,29 @@
    constrain_image(im);
}

void distort_image(image im, float hue, float sat, float val)
{
    rgb_to_hsv(im);
    scale_image_channel(im, 1, sat);
    scale_image_channel(im, 2, val);
    int i;
    for(i = 0; i < im.w*im.h; ++i){
        im.data[i] = im.data[i] + hue;
        if (im.data[i] > 1) im.data[i] -= 1;
        if (im.data[i] < 0) im.data[i] += 1;
    }
    hsv_to_rgb(im);
    constrain_image(im);
}

void random_distort_image(image im, float hue, float saturation, float exposure)
{
    float dhue = rand_uniform(-hue, hue);
    float dsat = rand_scale(saturation);
    float dexp = rand_scale(exposure);
    distort_image(im, dhue, dsat, dexp);
}

void saturate_exposure_image(image im, float sat, float exposure)
{
    rgb_to_hsv(im);
@@ -876,7 +945,6 @@
    return resized;
}

#include "cuda.h"

void test_resize(char *filename)
{
@@ -885,59 +953,40 @@
    printf("L2 Norm: %f\n", mag);
    image gray = grayscale_image(im);

    image sat2 = copy_image(im);
    saturate_image(sat2, 2);
    image c1 = copy_image(im);
    image c2 = copy_image(im);
    image c3 = copy_image(im);
    image c4 = copy_image(im);
    distort_image(c1, .1, 1.5, 1.5);
    distort_image(c2, -.1, .66666, .66666);
    distort_image(c3, .1, 1.5, .66666);
    distort_image(c4, .1, .66666, 1.5);

    image sat5 = copy_image(im);
    saturate_image(sat5, .5);

    image exp2 = copy_image(im);
    exposure_image(exp2, 2);

    image exp5 = copy_image(im);
    exposure_image(exp5, .5);

    image bin = binarize_image(im);

/*
#ifdef GPU
    image r = resize_image(im, im.w, im.h);
    image black = make_image(im.w*2 + 3, im.h*2 + 3, 9);
    image black2 = make_image(im.w, im.h, 3);

    float *r_gpu = cuda_make_array(r.data, r.w*r.h*r.c);
    float *black_gpu = cuda_make_array(black.data, black.w*black.h*black.c);
    float *black2_gpu = cuda_make_array(black2.data, black2.w*black2.h*black2.c);
    shortcut_gpu(3, r.w, r.h, 1, r_gpu, black.w, black.h, 3, black_gpu);
    //flip_image(r);
    //shortcut_gpu(3, r.w, r.h, 1, r.data, black.w, black.h, 3, black.data);

    shortcut_gpu(3, black.w, black.h, 3, black_gpu, black2.w, black2.h, 1, black2_gpu);
    cuda_pull_array(black_gpu, black.data, black.w*black.h*black.c);
    cuda_pull_array(black2_gpu, black2.data, black2.w*black2.h*black2.c);
    show_image_layers(black, "Black");
    show_image(black2, "Recreate");
#endif
*/
    image rot = rotate_crop_image(im, -.2618, 1, im.w/2, im.h/2, 0, 0);
    image rot3 = rotate_crop_image(im, -.2618, 2, im.w, im.h, im.w/2, 0);
    image rot2 = rotate_crop_image(im, -.2618, 1, im.w, im.h, 0, 0);
    show_image(rot, "Rotated");
    show_image(rot2, "base");

    show_image(rot3, "Rotated2");

/*
    show_image(im,   "Original");
    show_image(bin,  "Binary");
    show_image(gray, "Gray");
    show_image(sat2, "Saturation-2");
    show_image(sat5, "Saturation-.5");
    show_image(exp2, "Exposure-2");
    show_image(exp5, "Exposure-.5");
    */
    show_image(c1, "C1");
    show_image(c2, "C2");
    show_image(c3, "C3");
    show_image(c4, "C4");
#ifdef OPENCV
    cvWaitKey(0);
    while(1){
        float exposure = 1.15;
        float saturation = 1.15;
        float hue = .05;

        image c = copy_image(im);

        float dexp = rand_scale(exposure);
        float dsat = rand_scale(saturation);
        float dhue = rand_uniform(-hue, hue);

        distort_image(c, dhue, dsat, dexp);
        show_image(c, "rand");
        printf("%f %f %f\n", dhue, dsat, dexp);
        free_image(c);
        cvWaitKey(0);
    }
#endif
}

@@ -1180,10 +1229,8 @@
       image sized = resize_image(m, w, h);
     */
    normalize_image(m);
    image sized = resize_image(m, m.w, m.h);
    save_image(sized, window);
    show_image(sized, window);
    free_image(sized);
    save_image(m, window);
    show_image(m, window);
    free_image(m);
}


 src/image.h

@@ -32,6 +32,7 @@
image crop_image(image im, int dx, int dy, int w, int h);
image random_crop_image(image im, int w, int h);
image random_augment_image(image im, float angle, int low, int high, int size);
void random_distort_image(image im, float hue, float saturation, float exposure);
image resize_image(image im, int w, int h);
image resize_min(image im, int min);
void translate_image(image m, float s);
@@ -41,6 +42,7 @@
void embed_image(image source, image dest, int dx, int dy);
void saturate_image(image im, float sat);
void exposure_image(image im, float sat);
void distort_image(image im, float hue, float sat, float val);
void saturate_exposure_image(image im, float sat, float exposure);
void hsv_to_rgb(image im);
void rgbgr_image(image im);

 src/imagenet.c

File was deleted

 src/layer.h

@@ -72,6 +72,7 @@
    float saturation;
    float exposure;
    float shift;
    float ratio;
    int softmax;
    int classes;
    int coords;
@@ -82,6 +83,7 @@
    int joint;
    int noadjust;
    int reorg;
    int log;

    float alpha;
    float beta;

 src/maxpool_layer.c

@@ -18,7 +18,7 @@
    return float_to_image(w,h,c,l.delta);
}

maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride)
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding)
{
    fprintf(stderr, "Maxpool Layer: %d x %d x %d image, %d size, %d stride\n", h,w,c,size,stride);
    maxpool_layer l = {0};
@@ -27,8 +27,9 @@
    l.h = h;
    l.w = w;
    l.c = c;
    l.out_w = (w-1)/stride + 1;
    l.out_h = (h-1)/stride + 1;
    l.pad = padding;
    l.out_w = (w + 2*padding - size + 1)/stride + 1;
    l.out_h = (h + 2*padding - size + 1)/stride + 1;
    l.out_c = c;
    l.outputs = l.out_h * l.out_w * l.out_c;
    l.inputs = h*w*c;
@@ -48,13 +49,12 @@

void resize_maxpool_layer(maxpool_layer *l, int w, int h)
{
    int stride = l->stride;
    l->h = h;
    l->w = w;
    l->inputs = h*w*l->c;

    l->out_w = (w-1)/stride + 1;
    l->out_h = (h-1)/stride + 1;
    l->out_w = (w + 2*l->pad - l->size + 1)/l->stride + 1;
    l->out_h = (h + 2*l->pad - l->size + 1)/l->stride + 1;
    l->outputs = l->out_w * l->out_h * l->c;
    int output_size = l->outputs * l->batch;

@@ -75,11 +75,11 @@
void forward_maxpool_layer(const maxpool_layer l, network_state state)
{
    int b,i,j,k,m,n;
    int w_offset = (-l.size-1)/2 + 1;
    int h_offset = (-l.size-1)/2 + 1;
    int w_offset = -l.pad;
    int h_offset = -l.pad;

    int h = (l.h-1)/l.stride + 1;
    int w = (l.w-1)/l.stride + 1;
    int h = l.out_h;
    int w = l.out_w;
    int c = l.c;

    for(b = 0; b < l.batch; ++b){
@@ -112,8 +112,8 @@
void backward_maxpool_layer(const maxpool_layer l, network_state state)
{
    int i;
    int h = (l.h-1)/l.stride + 1;
    int w = (l.w-1)/l.stride + 1;
    int h = l.out_h;
    int w = l.out_w;
    int c = l.c;
    for(i = 0; i < h*w*c*l.batch; ++i){
        int index = l.indexes[i];

 src/maxpool_layer.h

@@ -9,7 +9,7 @@
typedef layer maxpool_layer;

image get_maxpool_image(maxpool_layer l);
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride);
maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding);
void resize_maxpool_layer(maxpool_layer *l, int w, int h);
void forward_maxpool_layer(const maxpool_layer l, network_state state);
void backward_maxpool_layer(const maxpool_layer l, network_state state);

 src/maxpool_layer_kernels.cu

@@ -7,10 +7,10 @@
#include "cuda.h"
}

__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, float *input, float *output, int *indexes)
__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *input, float *output, int *indexes)
{
    int h = (in_h-1)/stride + 1;
    int w = (in_w-1)/stride + 1;
    int h = (in_h + 2*pad - size + 1)/stride + 1;
    int w = (in_w + 2*pad - size + 1)/stride + 1;
    int c = in_c;

    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
@@ -24,8 +24,8 @@
    id /= c;
    int b = id;

    int w_offset = (-size-1)/2 + 1;
    int h_offset = (-size-1)/2 + 1;
    int w_offset = -pad;
    int h_offset = -pad;

    int out_index = j + w*(i + h*(k + c*b));
    float max = -INFINITY;
@@ -47,10 +47,10 @@
    indexes[out_index] = max_i;
}

__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, float *delta, float *prev_delta, int *indexes)
__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float *delta, float *prev_delta, int *indexes)
{
    int h = (in_h-1)/stride + 1;
    int w = (in_w-1)/stride + 1;
    int h = (in_h + 2*pad - size + 1)/stride + 1;
    int w = (in_w + 2*pad - size + 1)/stride + 1;
    int c = in_c;
    int area = (size-1)/stride;

@@ -66,8 +66,8 @@
    id /= in_c;
    int b = id;

    int w_offset = (-size-1)/2 + 1;
    int h_offset = (-size-1)/2 + 1;
    int w_offset = -pad;
    int h_offset = -pad;

    float d = 0;
    int l, m;
@@ -86,13 +86,13 @@

extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
{
    int h = (layer.h-1)/layer.stride + 1;
    int w = (layer.w-1)/layer.stride + 1;
    int h = layer.out_h;
    int w = layer.out_w;
    int c = layer.c;

    size_t n = h*w*c*layer.batch;

    forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, state.input, layer.output_gpu, layer.indexes_gpu);
    forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu);
    check_error(cudaPeekAtLastError());
}

@@ -100,7 +100,7 @@
{
    size_t n = layer.h*layer.w*layer.c*layer.batch;

    backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.delta_gpu, state.delta, layer.indexes_gpu);
    backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu);
    check_error(cudaPeekAtLastError());
}


 src/network.c

@@ -420,7 +420,7 @@
    net->h = h;
    int inputs = 0;
    size_t workspace_size = 0;
    //fprintf(stderr, "Resizing to %d x %d...", w, h);
    //fprintf(stderr, "Resizing to %d x %d...\n", w, h);
    //fflush(stderr);
    for (i = 0; i < net->n; ++i){
        layer l = net->layers[i];

 src/network.h

@@ -43,6 +43,7 @@
    float angle;
    float exposure;
    float saturation;
    float hue;

    int gpu_index;


 src/parser.c

@@ -2,6 +2,7 @@
#include <string.h>
#include <stdlib.h>

#include "blas.h"
#include "parser.h"
#include "assert.h"
#include "activations.h"
@@ -147,7 +148,10 @@
    int n = option_find_int(options, "filters",1);
    int size = option_find_int(options, "size",1);
    int stride = option_find_int(options, "stride",1);
    int pad = option_find_int(options, "pad",0);
    int pad = option_find_int_quiet(options, "pad",0);
    int padding = option_find_int_quiet(options, "padding",0);
    if(pad) padding = size/2;

    char *activation_s = option_find_str(options, "activation", "logistic");
    ACTIVATION activation = get_activation(activation_s);

@@ -161,7 +165,7 @@
    int binary = option_find_int_quiet(options, "binary", 0);
    int xnor = option_find_int_quiet(options, "xnor", 0);

    convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,pad,activation, batch_normalize, binary, xnor);
    convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,padding,activation, batch_normalize, binary, xnor);
    layer.flipped = option_find_int_quiet(options, "flipped", 0);
    layer.dot = option_find_float_quiet(options, "dot", 0);

@@ -234,9 +238,16 @@
    int coords = option_find_int(options, "coords", 4);
    int classes = option_find_int(options, "classes", 20);
    int num = option_find_int(options, "num", 1);

    params.w = option_find_int(options, "side", params.w);
    params.h = option_find_int(options, "side", params.h);

    layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords);
    assert(l.outputs == params.inputs);

    l.log = option_find_int_quiet(options, "log", 0);
    l.sqrt = option_find_int_quiet(options, "sqrt", 0);

    l.softmax = option_find_int(options, "softmax", 0);
    l.max_boxes = option_find_int_quiet(options, "max",30);
    l.jitter = option_find_float(options, "jitter", .2);
@@ -278,6 +289,7 @@
    COST_TYPE type = get_cost_type(type_s);
    float scale = option_find_float_quiet(options, "scale",1);
    cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale);
    layer.ratio =  option_find_float_quiet(options, "ratio",0);
    return layer;
}

@@ -324,6 +336,7 @@
{
    int stride = option_find_int(options, "stride",1);
    int size = option_find_int(options, "size",stride);
    int padding = option_find_int_quiet(options, "padding", (size-1)/2);

    int batch,h,w,c;
    h = params.h;
@@ -332,7 +345,7 @@
    batch=params.batch;
    if(!(h && w && c)) error("Layer before maxpool layer must output image.");

    maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride);
    maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding);
    return layer;
}

@@ -486,6 +499,7 @@
    net->angle = option_find_float_quiet(options, "angle", 0);
    net->saturation = option_find_float_quiet(options, "saturation", 1);
    net->exposure = option_find_float_quiet(options, "exposure", 1);
    net->hue = option_find_float_quiet(options, "hue", 0);

    if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");

@@ -1085,6 +1099,7 @@
        fread(l.rolling_variance, sizeof(float), l.n, fp);
    }
    fread(l.filters, sizeof(float), num, fp);
    //if(l.c == 3) scal_cpu(num, 1./256, l.filters, 1);
    if (l.flipped) {
        transpose_matrix(l.filters, l.c*l.size*l.size, l.n);
    }

 src/region_layer.c

@@ -80,8 +80,8 @@
        b.w = logistic_activate(x[index + 2]);
        b.h = logistic_activate(x[index + 3]);
    }
    if(adjust && b.w < .01) b.w = .01;
    if(adjust && b.h < .01) b.h = .01;
    //if(adjust && b.w < .01) b.w = .01;
    //if(adjust && b.h < .01) b.h = .01;
    return b;
}

@@ -149,6 +149,7 @@
                    l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4]));
                    if(best_iou > .5) l.delta[index + 4] = 0;

                    /*
                    if(*(state.net.seen) < 6400){
                        box truth = {0};
                        truth.x = (i + .5)/l.w;
@@ -157,6 +158,7 @@
                        truth.h = .5;
                        delta_region_box(truth, l.output, index, i, j, l.w, l.h, l.delta, LOG, 1);
                    }
                    */
                }
            }
        }

 src/tag.c

@@ -45,6 +45,11 @@
    args.d = &buffer;
    args.type = TAG_DATA;

    args.angle = net.angle;
    args.exposure = net.exposure;
    args.saturation = net.saturation;
    args.hue = net.hue;

    fprintf(stderr, "%d classes\n", net.outputs);

    load_thread = load_data_in_thread(args);

 src/utils.c

@@ -585,6 +585,13 @@
    return ((float)rand()/RAND_MAX * (max - min)) + min;
}

float rand_scale(float s)
{
    float scale = rand_uniform(1, s);
    if(rand()%2) return scale;
    return 1./scale;
}

float **one_hot_encode(float *a, int n, int k)
{
    int i;

 src/utils.h

@@ -42,6 +42,7 @@
float rand_normal();
size_t rand_size_t();
float rand_uniform(float min, float max);
float rand_scale(float s);
int rand_int(int min, int max);
float sum_array(float *a, int n);
float mean_array(float *a, int n);

 src/voxel.c

@@ -9,37 +9,37 @@

void extract_voxel(char *lfile, char *rfile, char *prefix)
{
#ifdef OPENCV
    int w = 1920;
    int h = 1080;
#ifdef OPENCV
        int shift = 0;
        int count = 0;
        CvCapture *lcap = cvCaptureFromFile(lfile);
        CvCapture *rcap = cvCaptureFromFile(rfile);
        while(1){
            image l = get_image_from_stream(lcap);
            image r = get_image_from_stream(rcap);
            if(!l.w || !r.w) break;
            if(count%100 == 0) {
                shift = best_3d_shift_r(l, r, -l.h/100, l.h/100);
                printf("%d\n", shift);
            }
            image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h);
            image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h);
            char buff[256];
            sprintf(buff, "%s_%05d_l", prefix, count);
            save_image(ls, buff);
            sprintf(buff, "%s_%05d_r", prefix, count);
            save_image(rs, buff);
            free_image(l);
            free_image(r);
            free_image(ls);
            free_image(rs);
            ++count;
    int shift = 0;
    int count = 0;
    CvCapture *lcap = cvCaptureFromFile(lfile);
    CvCapture *rcap = cvCaptureFromFile(rfile);
    while(1){
        image l = get_image_from_stream(lcap);
        image r = get_image_from_stream(rcap);
        if(!l.w || !r.w) break;
        if(count%100 == 0) {
            shift = best_3d_shift_r(l, r, -l.h/100, l.h/100);
            printf("%d\n", shift);
        }
        image ls = crop_image(l, (l.w - w)/2, (l.h - h)/2, w, h);
        image rs = crop_image(r, 105 + (r.w - w)/2, (r.h - h)/2 + shift, w, h);
        char buff[256];
        sprintf(buff, "%s_%05d_l", prefix, count);
        save_image(ls, buff);
        sprintf(buff, "%s_%05d_r", prefix, count);
        save_image(rs, buff);
        free_image(l);
        free_image(r);
        free_image(ls);
        free_image(rs);
        ++count;
    }

#else
printf("need OpenCV for extraction\n");
    printf("need OpenCV for extraction\n");
#endif
}

@@ -164,6 +164,6 @@
    else if(0==strcmp(argv[2], "test")) test_voxel(cfg, weights, filename);
    else if(0==strcmp(argv[2], "extract")) extract_voxel(argv[3], argv[4], argv[5]);
    /*
    else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights);
    */
       else if(0==strcmp(argv[2], "valid")) validate_voxel(cfg, weights);
     */
}

 src/yolo.c

@@ -54,6 +54,11 @@
    args.d = &buffer;
    args.type = REGION_DATA;

    args.angle = net.angle;
    args.exposure = net.exposure;
    args.saturation = net.saturation;
    args.hue = net.hue;

    pthread_t load_thread = load_data_in_thread(args);
    clock_t time;
    //while(i*imgs < N*120){

			@@ -14,6 +14,7 @@
			submission/
			cfg/
			darknet
			.fuse*

			# OS Generated #
			.DS_Store*

			@@ -1,6 +1,6 @@
			GPU=1
			CUDNN=1
			OPENCV=1
			GPU=0
			CUDNN=0
			OPENCV=0
			DEBUG=0

			ARCH= --gpu-architecture=compute_52 --gpu-code=compute_52
			@@ -41,7 +41,7 @@
			LDFLAGS+= -lcudnn
			endif

			OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
			OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o super.o voxel.o
			ifeq ($(GPU), 1)
			LDFLAGS+= -lstdc++
			OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o

New file
			@@ -0,0 +1,209 @@
			[net]
			batch=128
			subdivisions=1
			height=224
			width=224
			max_crop=320
			channels=3
			momentum=0.9
			decay=0.0005

			learning_rate=0.01
			max_batches = 0
			policy=steps
			steps=444000,590000,970000
			scales=.5,.2,.1

			#policy=sigmoid
			#gamma=.00008
			#step=100000
			#max_batches=200000

			[convolutional]
			batch_normalize=1
			filters=64
			size=7
			stride=2
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=2

			[convolutional]
			batch_normalize=1
			filters=192
			size=3
			stride=1
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=2

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=2

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=3
			stride=1
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=2

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=2048
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=2048
			size=3
			stride=1
			pad=1
			activation=leaky

			[avgpool]

			[connected]
			output=21842
			activation=leaky

			[softmax]
			groups=1

			[cost]
			type=sse

			@@ -3,102 +3,126 @@
			subdivisions=1
			height=19
			width=19
			channels=8
			channels=1
			momentum=0.9
			decay=0.0005

			learning_rate=0.1
			max_batches = 0
			policy=steps
			steps=50000
			scales=.1
			policy=poly
			power=4
			max_batches=400000

			[convolutional]
			filters=512
			filters=192
			size=3
			stride=1
			pad=1
			activation=leaky
			activation=relu
			batch_normalize=1

			[convolutional]
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky
			batch_normalize=1

			[convolutional]
			filters=512
			filters=192
			size=3
			stride=1
			pad=1
			activation=leaky
			activation=relu
			batch_normalize=1

			[convolutional]
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky
			batch_normalize=1

			[convolutional]
			filters=512
			filters=192
			size=3
			stride=1
			pad=1
			activation=leaky
			activation=relu
			batch_normalize=1

			[convolutional]
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky
			batch_normalize=1

			[convolutional]
			filters=512
			filters=192
			size=3
			stride=1
			pad=1
			activation=leaky
			activation=relu
			batch_normalize=1

			[convolutional]
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky
			batch_normalize=1

			[convolutional]
			filters=512
			filters=192
			size=3
			stride=1
			pad=1
			activation=leaky
			activation=relu
			batch_normalize=1

			[convolutional]
			filters=256
			size=1
			filters=192
			size=3
			stride=1
			pad=1
			activation=leaky
			activation=relu
			batch_normalize=1

			[convolutional]
			filters=192
			size=3
			stride=1
			pad=1
			activation=relu
			batch_normalize=1

			[convolutional]
			filters=192
			size=3
			stride=1
			pad=1
			activation=relu
			batch_normalize=1

			[convolutional]
			filters=192
			size=3
			stride=1
			pad=1
			activation=relu
			batch_normalize=1

			[convolutional]
			filters=192
			size=3
			stride=1
			pad=1
			activation=relu
			batch_normalize=1

			[convolutional]
			filters=192
			size=3
			stride=1
			pad=1
			activation=relu
			batch_normalize=1

			[convolutional]
			filters=192
			size=3
			stride=1
			pad=1
			activation=relu
			batch_normalize=1

			[convolutional]
			filters=192
			size=3
			stride=1
			pad=1
			activation=relu
			batch_normalize=1


			[convolutional]
			filters=1
			size=1
			stride=1
			pad=1
			activation=leaky
			activation=linear

			[softmax]

			@@ -1,9 +1,8 @@
			classes=1000
			labels = data/inet.labels.list
			names = data/shortnames.txt
			train = /data/imagenet/imagenet1k.train.list
			valid = /data/imagenet/imagenet1k.valid.list
			top=5
			test = /Users/pjreddie/Documents/sites/selfie/paths.list
			train = /data/imagenet/imagenet1k.train.list
			valid = /data/imagenet/imagenet1k.valid.list
			backup = /home/pjreddie/backup/
			labels = data/imagenet.labels.list
			names = data/imagenet.shortnames.list
			top=5

			@@ -1,11 +1,14 @@
			[net]
			batch=64
			subdivisions=2
			batch=1
			subdivisions=1
			height=448
			width=448
			channels=3
			momentum=0.9
			decay=0.0005
			saturation=1.5
			exposure=1.5
			hue=.1

			learning_rate=0.0005
			policy=steps
			@@ -13,15 +16,6 @@
			scales=2.5,2,2,.1,.1
			max_batches = 40000

			[crop]
			crop_width=448
			crop_height=448
			flip=0
			angle=0
			saturation = 1.5
			exposure = 1.5
			noadjust=1

			[convolutional]
			batch_normalize=1
			filters=64

New file
			@@ -0,0 +1,257 @@
			[net]
			batch=64
			subdivisions=4
			height=448
			width=448
			channels=3
			momentum=0.9
			decay=0.0005
			saturation=1.5
			exposure=1.5
			hue=.1

			learning_rate=0.0005
			policy=steps
			steps=200,400,600,20000,30000
			scales=2.5,2,2,.1,.1
			max_batches = 40000

			[convolutional]
			batch_normalize=1
			filters=64
			size=7
			stride=2
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=2

			[convolutional]
			batch_normalize=1
			filters=192
			size=3
			stride=1
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=2

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=2

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=3
			stride=1
			pad=1
			activation=leaky

			[maxpool]
			size=2
			stride=2

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=3
			stride=1
			pad=1
			activation=leaky

			#######

			[convolutional]
			batch_normalize=1
			size=3
			stride=1
			pad=1
			filters=1024
			activation=leaky

			[convolutional]
			batch_normalize=1
			size=3
			stride=2
			pad=1
			filters=1024
			activation=leaky

			[convolutional]
			batch_normalize=1
			size=3
			stride=1
			pad=1
			filters=1024
			activation=leaky

			[convolutional]
			batch_normalize=1
			size=3
			stride=1
			pad=1
			filters=1024
			activation=leaky

			[local]
			size=3
			stride=1
			pad=1
			filters=256
			activation=leaky

			[dropout]
			probability=.5

			[connected]
			output= 1715
			activation=linear

			[detection]
			classes=20
			coords=4
			rescore=1
			side=7
			num=3
			softmax=0
			sqrt=1
			jitter=.2

			object_scale=1
			noobject_scale=.5
			class_scale=1
			coord_scale=5

			@@ -40,6 +40,7 @@
			void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY);
			void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY);
			void scal_ongpu(int N, float ALPHA, float * X, int INCX);
			void supp_ongpu(int N, float ALPHA, float * X, int INCX);
			void mask_ongpu(int N, float * X, float mask_num, float * mask);
			void const_ongpu(int N, float ALPHA, float *X, int INCX);
			void pow_ongpu(int N, float ALPHA, float X, int INCX, float Y, int INCY);

			@@ -368,6 +368,14 @@
			if(i < N) X[iINCX] = min(ALPHA, max(-ALPHA, X[iINCX]));
			}

			__global__ void supp_kernel(int N, float ALPHA, float *X, int INCX)
			{
			int i = (blockIdx.x + blockIdx.ygridDim.x) blockDim.x + threadIdx.x;
			if(i < N) {
			if((X[iINCX] X[iINCX]) < (ALPHA ALPHA)) X[i*INCX] = 0;
			}
			}

			__global__ void scal_kernel(int N, float ALPHA, float *X, int INCX)
			{
			int i = (blockIdx.x + blockIdx.ygridDim.x) blockDim.x + threadIdx.x;
			@@ -552,6 +560,12 @@
			check_error(cudaPeekAtLastError());
			}

			extern "C" void supp_ongpu(int N, float ALPHA, float * X, int INCX)
			{
			supp_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
			check_error(cudaPeekAtLastError());
			}

			extern "C" void fill_ongpu(int N, float ALPHA, float * X, int INCX)
			{
			fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
			@@ -633,6 +647,7 @@
			}



			__global__ void weighted_sum_kernel(int n, float a, float b, float s, float c)
			{
			int i = (blockIdx.x + blockIdx.ygridDim.x) blockDim.x + threadIdx.x;

			@@ -39,6 +39,18 @@
			return options;
			}

			float get_regression_values(char *labels, int n)
			{
			float *v = calloc(n, sizeof(float));
			int i;
			for(i = 0; i < n; ++i){
			char *p = strchr(labels[i], ' ');
			*p = 0;
			v[i] = atof(p+1);
			}
			return v;
			}

			void train_classifier(char datacfg, char cfgfile, char *weightfile, int clear)
			{
			int nthreads = 8;
			@@ -85,6 +97,7 @@
			args.angle = net.angle;
			args.exposure = net.exposure;
			args.saturation = net.saturation;
			args.hue = net.hue;
			args.size = net.w;

			args.paths = paths;
			@@ -116,6 +129,7 @@
			printf("Loaded: %lf seconds\n", sec(clock()-time));
			time=clock();

			#ifdef OPENCV
			if(0){
			int u;
			for(u = 0; u < imgs; ++u){
			@@ -124,6 +138,7 @@
			cvWaitKey(0);
			}
			}
			#endif

			float loss = train_network(net, train);
			if(avg_loss == -1) avg_loss = loss;
			@@ -440,7 +455,7 @@

			char **labels = get_labels(label_list);
			list *plist = get_paths(valid_list);
			int scales[] = {192, 224, 288, 320, 352};
			int scales[] = {224, 288, 320, 352, 384};
			int nscales = sizeof(scales)/sizeof(scales[0]);

			char paths = (char )list_to_array(plist);
			@@ -484,6 +499,88 @@
			}
			}

			void try_classifier(char datacfg, char cfgfile, char weightfile, char filename, int layer_num)
			{
			network net = parse_network_cfg(cfgfile);
			if(weightfile){
			load_weights(&net, weightfile);
			}
			set_batch_network(&net, 1);
			srand(2222222);

			list *options = read_data_cfg(datacfg);

			char *name_list = option_find_str(options, "names", 0);
			if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list");
			int top = option_find_int(options, "top", 1);

			int i = 0;
			char **names = get_labels(name_list);
			clock_t time;
			int *indexes = calloc(top, sizeof(int));
			char buff[256];
			char *input = buff;
			while(1){
			if(filename){
			strncpy(input, filename, 256);
			}else{
			printf("Enter Image Path: ");
			fflush(stdout);
			input = fgets(input, 256, stdin);
			if(!input) return;
			strtok(input, "\n");
			}
			image orig = load_image_color(input, 0, 0);
			image r = resize_min(orig, 256);
			image im = crop_image(r, (r.w - 224 - 1)/2 + 1, (r.h - 224 - 1)/2 + 1, 224, 224);
			float mean[] = {0.48263312050943, 0.45230225481413, 0.40099074308742};
			float std[] = {0.22590347483426, 0.22120921437787, 0.22103996251583};
			float var[3];
			var[0] = std[0]*std[0];
			var[1] = std[1]*std[1];
			var[2] = std[2]*std[2];

			normalize_cpu(im.data, mean, var, 1, 3, im.w*im.h);

			float *X = im.data;
			time=clock();
			float *predictions = network_predict(net, X);

			layer l = net.layers[layer_num];
			for(i = 0; i < l.c; ++i){
			if(l.rolling_mean) printf("%f %f %f\n", l.rolling_mean[i], l.rolling_variance[i], l.scales[i]);
			}
			#ifdef GPU
			cuda_pull_array(l.output_gpu, l.output, l.outputs);
			#endif
			for(i = 0; i < l.outputs; ++i){
			printf("%f\n", l.output[i]);
			}
			/*

			printf("\n\nWeights\n");
			for(i = 0; i < l.nl.sizel.size*l.c; ++i){
			printf("%f\n", l.filters[i]);
			}

			printf("\n\nBiases\n");
			for(i = 0; i < l.n; ++i){
			printf("%f\n", l.biases[i]);
			}
			*/

			top_predictions(net, top, indexes);
			printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
			for(i = 0; i < top; ++i){
			int index = indexes[i];
			printf("%s: %f\n", names[index], predictions[index]);
			}
			free_image(im);
			if (filename) break;
			}
			}


			void predict_classifier(char datacfg, char cfgfile, char weightfile, char filename)
			{
			network net = parse_network_cfg(cfgfile);
			@@ -649,6 +746,127 @@
			}


			void threat_classifier(char datacfg, char cfgfile, char weightfile, int cam_index, const char filename)
			{
			#ifdef OPENCV
			float threat = 0;
			float roll = .2;

			printf("Classifier Demo\n");
			network net = parse_network_cfg(cfgfile);
			if(weightfile){
			load_weights(&net, weightfile);
			}
			set_batch_network(&net, 1);
			list *options = read_data_cfg(datacfg);

			srand(2222222);
			CvCapture * cap;

			if(filename){
			cap = cvCaptureFromFile(filename);
			}else{
			cap = cvCaptureFromCAM(cam_index);
			}

			int top = option_find_int(options, "top", 1);

			char *name_list = option_find_str(options, "names", 0);
			char **names = get_labels(name_list);

			int *indexes = calloc(top, sizeof(int));

			if(!cap) error("Couldn't connect to webcam.\n");
			//cvNamedWindow("Threat", CV_WINDOW_NORMAL);
			//cvResizeWindow("Threat", 512, 512);
			float fps = 0;
			int i;

			int count = 0;

			while(1){
			++count;
			struct timeval tval_before, tval_after, tval_result;
			gettimeofday(&tval_before, NULL);

			image in = get_image_from_stream(cap);
			if(!in.data) break;
			image in_s = resize_image(in, net.w, net.h);

			image out = in;
			int x1 = out.w / 20;
			int y1 = out.h / 20;
			int x2 = 2*x1;
			int y2 = out.h - out.h/20;

			int border = .01*out.h;
			int h = y2 - y1 - 2*border;
			int w = x2 - x1 - 2*border;

			float *predictions = network_predict(net, in_s.data);
			float curr_threat = predictions[0] * 0 + predictions[1] * .6 + predictions[2];
			threat = roll * curr_threat + (1-roll) * threat;

			draw_box_width(out, x2 + border, y1 + .02h, x2 + .5 w, y1 + .02*h + border, border, 0,0,0);
			if(threat > .97) {
			draw_box_width(out, x2 + .5 * w + border,
			y1 + .02h - 2border,
			x2 + .5 * w + 6*border,
			y1 + .02h + 3border, 3*border, 1,0,0);
			}
			draw_box_width(out, x2 + .5 * w + border,
			y1 + .02h - 2border,
			x2 + .5 * w + 6*border,
			y1 + .02h + 3border, .5*border, 0,0,0);
			draw_box_width(out, x2 + border, y1 + .42h, x2 + .5 w, y1 + .42*h + border, border, 0,0,0);
			if(threat > .57) {
			draw_box_width(out, x2 + .5 * w + border,
			y1 + .42h - 2border,
			x2 + .5 * w + 6*border,
			y1 + .42h + 3border, 3*border, 1,1,0);
			}
			draw_box_width(out, x2 + .5 * w + border,
			y1 + .42h - 2border,
			x2 + .5 * w + 6*border,
			y1 + .42h + 3border, .5*border, 0,0,0);

			draw_box_width(out, x1, y1, x2, y2, border, 0,0,0);
			for(i = 0; i < threat * h ; ++i){
			float ratio = (float) i / h;
			float r = (ratio < .5) ? (2*(ratio)) : 1;
			float g = (ratio < .5) ? 1 : 1 - 2*(ratio - .5);
			draw_box_width(out, x1 + border, y2 - border - i, x2 - border, y2 - border - i, 1, r, g, 0);
			}
			top_predictions(net, top, indexes);
			char buff[256];
			sprintf(buff, "/home/pjreddie/tmp/threat_%06d", count);
			save_image(out, buff);

			printf("\033[2J");
			printf("\033[1;1H");
			printf("\nFPS:%.0f\n",fps);

			for(i = 0; i < top; ++i){
			int index = indexes[i];
			printf("%.1f%%: %s\n", predictions[index]*100, names[index]);
			}

			if(0){
			show_image(out, "Threat");
			cvWaitKey(10);
			}
			free_image(in_s);
			free_image(in);

			gettimeofday(&tval_after, NULL);
			timersub(&tval_after, &tval_before, &tval_result);
			float curr = 1000000.f/((long int)tval_result.tv_usec);
			fps = .9fps + .1curr;
			}
			#endif
			}


			void demo_classifier(char datacfg, char cfgfile, char weightfile, int cam_index, const char filename)
			{
			#ifdef OPENCV
			@@ -732,8 +950,10 @@
			char *layer_s = (argc > 7) ? argv[7]: 0;
			int layer = layer_s ? atoi(layer_s) : -1;
			if(0==strcmp(argv[2], "predict")) predict_classifier(data, cfg, weights, filename);
			else if(0==strcmp(argv[2], "try")) try_classifier(data, cfg, weights, filename, atoi(layer_s));
			else if(0==strcmp(argv[2], "train")) train_classifier(data, cfg, weights, clear);
			else if(0==strcmp(argv[2], "demo")) demo_classifier(data, cfg, weights, cam_index, filename);
			else if(0==strcmp(argv[2], "threat")) threat_classifier(data, cfg, weights, cam_index, filename);
			else if(0==strcmp(argv[2], "test")) test_classifier(data, cfg, weights, layer);
			else if(0==strcmp(argv[2], "label")) label_classifier(data, cfg, weights);
			else if(0==strcmp(argv[2], "valid")) validate_classifier_single(data, cfg, weights);

			@@ -25,6 +25,7 @@
			//char *train_images = "/home/pjreddie/data/voc/test/train.txt";
			//char *train_images = "/home/pjreddie/data/coco/train.txt";
			char *train_images = "data/coco.trainval.txt";
			//char *train_images = "data/bags.train.list";
			char *backup_directory = "/home/pjreddie/backup/";
			srand(time(0));
			data_seed = time(0);
			@@ -63,6 +64,11 @@
			args.d = &buffer;
			args.type = REGION_DATA;

			args.angle = net.angle;
			args.exposure = net.exposure;
			args.saturation = net.saturation;
			args.hue = net.hue;

			pthread_t load_thread = load_data_in_thread(args);
			clock_t time;
			//while(iimgs < N120){
			@@ -94,6 +100,11 @@
			sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i);
			save_weights(net, buff);
			}
			if(i%100==0){
			char buff[256];
			sprintf(buff, "%s/%s.backup", backup_directory, base);
			save_weights(net, buff);
			}
			free_data(train);
			}
			char buff[256];

			@@ -16,13 +16,9 @@
			int ksize, int stride, int pad, float* data_im)
			{
			int c,h,w;
			int height_col = (height - ksize) / stride + 1;
			int width_col = (width - ksize) / stride + 1;
			if (pad){
			height_col = 1 + (height-1) / stride;
			width_col = 1 + (width-1) / stride;
			pad = ksize/2;
			}
			int height_col = (height + 2*pad - ksize) / stride + 1;
			int width_col = (width + 2*pad - ksize) / stride + 1;

			int channels_col = channels * ksize * ksize;
			for (c = 0; c < channels_col; ++c) {
			int w_offset = c % ksize;

			@@ -46,7 +46,6 @@
			int ksize, int stride, int pad, float *data_im){
			// We are going to launch channels * height_col * width_col kernels, each
			// kernel responsible for copying a single-channel grid.
			pad = pad ? ksize/2 : 0;
			int height_col = (height + 2 * pad - ksize) / stride + 1;
			int width_col = (width + 2 * pad - ksize) / stride + 1;
			int num_kernels = channels * height * width;

			@@ -17,7 +17,7 @@
			{
			int i = (blockIdx.x + blockIdx.ygridDim.x) blockDim.x + threadIdx.x;
			if (i >= n) return;
			binary[i] = (x[i] > 0) ? 1 : -1;
			binary[i] = (x[i] >= 0) ? 1 : -1;
			}

			void binarize_gpu(float x, int n, float binary)
			@@ -60,6 +60,7 @@
			mean = mean / size;
			for(i = 0; i < size; ++i){
			binary[fsize + i] = (filters[fsize + i] > 0) ? mean : -mean;
			//binary[fsize + i] = filters[fsize + i];
			}
			}

			@@ -70,18 +70,12 @@

			int convolutional_out_height(convolutional_layer l)
			{
			int h = l.h;
			if (!l.pad) h -= l.size;
			else h -= 1;
			return h/l.stride + 1;
			return (l.h + 2*l.pad - l.size) / l.stride + 1;
			}

			int convolutional_out_width(convolutional_layer l)
			{
			int w = l.w;
			if (!l.pad) w -= l.size;
			else w -= 1;
			return w/l.stride + 1;
			return (l.w + 2*l.pad - l.size) / l.stride + 1;
			}

			image get_convolutional_image(convolutional_layer l)
			@@ -148,8 +142,7 @@
			cudnnSetTensor4dDescriptor(l->srcTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->c, l->h, l->w);
			cudnnSetTensor4dDescriptor(l->dstTensorDesc, CUDNN_TENSOR_NCHW, CUDNN_DATA_FLOAT, l->batch, l->out_c, l->out_h, l->out_w);
			cudnnSetFilter4dDescriptor(l->filterDesc, CUDNN_DATA_FLOAT, CUDNN_TENSOR_NCHW, l->n, l->c, l->size, l->size);
			int padding = l->pad ? l->size/2 : 0;
			cudnnSetConvolution2dDescriptor(l->convDesc, padding, padding, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
			cudnnSetConvolution2dDescriptor(l->convDesc, l->pad, l->pad, l->stride, l->stride, 1, 1, CUDNN_CROSS_CORRELATION);
			cudnnGetConvolutionForwardAlgorithm(cudnn_handle(),
			l->srcTensorDesc,
			l->filterDesc,
			@@ -178,7 +171,7 @@
			#endif
			#endif

			convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, int batch_normalize, int binary, int xnor)
			convolutional_layer make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int padding, ACTIVATION activation, int batch_normalize, int binary, int xnor)
			{
			int i;
			convolutional_layer l = {0};
			@@ -193,7 +186,7 @@
			l.batch = batch;
			l.stride = stride;
			l.size = size;
			l.pad = pad;
			l.pad = padding;
			l.batch_normalize = batch_normalize;

			l.filters = calloc(cnsize*size, sizeof(float));

			@@ -98,6 +98,15 @@
			cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs);
			}

			int float_abs_compare (const void * a, const void * b)
			{
			float fa = (const float) a;
			if(fa < 0) fa = -fa;
			float fb = (const float) b;
			if(fb < 0) fb = -fb;
			return (fa > fb) - (fa < fb);
			}

			void forward_cost_layer_gpu(cost_layer l, network_state state)
			{
			if (!state.truth) return;
			@@ -111,6 +120,16 @@
			l2_gpu(l.batch*l.inputs, state.input, state.truth, l.delta_gpu, l.output_gpu);
			}

			if(l.ratio){
			cuda_pull_array(l.delta_gpu, l.delta, l.batch*l.inputs);
			qsort(l.delta, l.batch*l.inputs, sizeof(float), float_abs_compare);
			int n = (1-l.ratio) * l.batch*l.inputs;
			float thresh = l.delta[n];
			thresh = 0;
			printf("%f\n", thresh);
			supp_ongpu(l.batch*l.inputs, thresh, l.delta_gpu, 1);
			}

			cuda_pull_array(l.output_gpu, l.output, l.batch*l.inputs);
			l.cost[0] = sum_array(l.output, l.batch*l.inputs);
			}

			@@ -13,7 +13,6 @@
			#endif

			extern void run_voxel(int argc, char **argv);
			extern void run_imagenet(int argc, char **argv);
			extern void run_yolo(int argc, char **argv);
			extern void run_detector(int argc, char **argv);
			extern void run_coco(int argc, char **argv);
			@@ -327,9 +326,7 @@
			}
			#endif

			if(0==strcmp(argv[1], "imagenet")){
			run_imagenet(argc, argv);
			} else if (0 == strcmp(argv[1], "average")){
			if (0 == strcmp(argv[1], "average")){
			average(argc, argv);
			} else if (0 == strcmp(argv[1], "yolo")){
			run_yolo(argc, argv);

			@@ -100,7 +100,7 @@
			return X;
			}

			matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float exposure, float saturation)
			matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure)
			{
			int i;
			matrix X;
			@@ -113,10 +113,7 @@
			image crop = random_augment_image(im, angle, min, max, size);
			int flip = rand_r(&data_seed)%2;
			if (flip) flip_image(crop);
			float exp = rand_uniform(1./exposure, exposure);
			float sat = rand_uniform(1./saturation, saturation);
			exposure_image(crop, exp);
			exposure_image(crop, sat);
			random_distort_image(crop, hue, saturation, exposure);

			/*
			show_image(im, "orig");
			@@ -241,6 +238,7 @@
			labelpath = find_replace(labelpath, "JPEGImages", "labels");

			labelpath = find_replace(labelpath, ".jpg", ".txt");
			labelpath = find_replace(labelpath, ".png", ".txt");
			labelpath = find_replace(labelpath, ".JPG", ".txt");
			labelpath = find_replace(labelpath, ".JPEG", ".txt");
			int count = 0;
			@@ -287,6 +285,7 @@
			labelpath = find_replace(labelpath, "JPEGImages", "labels");

			labelpath = find_replace(labelpath, ".jpg", ".txt");
			labelpath = find_replace(labelpath, ".png", ".txt");
			labelpath = find_replace(labelpath, ".JPG", ".txt");
			labelpath = find_replace(labelpath, ".JPEG", ".txt");
			int count = 0;
			@@ -443,7 +442,7 @@
			}
			}

			data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter)
			data load_data_region(int n, char **paths, int m, int w, int h, int size, int classes, float jitter, float hue, float saturation, float exposure)
			{
			char **random_paths = get_random_paths(paths, n, m);
			int i;
			@@ -485,6 +484,7 @@

			image sized = resize_image(cropped, w, h);
			if(flip) flip_image(sized);
			random_distort_image(sized, hue, saturation, exposure);
			d.X.vals[i] = sized.data;

			fill_truth_region(random_paths[i], d.y.vals[i], classes, size, flip, dx, dy, 1./sx, 1./sy);
			@@ -611,7 +611,7 @@
			return d;
			}

			data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter)
			data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure)
			{
			char **random_paths = get_random_paths(paths, n, m);
			int i;
			@@ -651,6 +651,7 @@

			image sized = resize_image(cropped, w, h);
			if(flip) flip_image(sized);
			random_distort_image(sized, hue, saturation, exposure);
			d.X.vals[i] = sized.data;

			fill_truth_detection(random_paths[i], boxes, d.y.vals[i], classes, flip, dx, dy, 1./sx, 1./sy);
			@@ -679,17 +680,17 @@
			if (a.type == OLD_CLASSIFICATION_DATA){
			*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
			} else if (a.type == CLASSIFICATION_DATA){
			*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
			*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
			} else if (a.type == SUPER_DATA){
			*a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale);
			} else if (a.type == STUDY_DATA){
			*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
			*a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
			} else if (a.type == WRITING_DATA){
			*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
			} else if (a.type == REGION_DATA){
			*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
			*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
			} else if (a.type == DETECTION_DATA){
			*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
			*a.d = load_data_detection(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter, a.hue, a.saturation, a.exposure);
			} else if (a.type == SWAG_DATA){
			*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
			} else if (a.type == COMPARE_DATA){
			@@ -698,7 +699,7 @@
			*(a.im) = load_image_color(a.path, 0, 0);
			(a.resized) = resize_image((a.im), a.w, a.h);
			} else if (a.type == TAG_DATA){
			*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.exposure, a.saturation);
			*a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure);
			//*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
			}
			free(ptr);
			@@ -740,13 +741,13 @@
			return d;
			}

			data load_data_study(char paths, int n, int m, char labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
			data load_data_study(char paths, int n, int m, char labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
			{
			data d = {0};
			d.indexes = calloc(n, sizeof(int));
			if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
			d.shallow = 0;
			d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
			d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
			d.y = load_labels_paths(paths, n, labels, k);
			if(m) free(paths);
			return d;
			@@ -782,25 +783,25 @@
			return d;
			}

			data load_data_augment(char paths, int n, int m, char labels, int k, int min, int max, int size, float angle, float exposure, float saturation)
			data load_data_augment(char paths, int n, int m, char labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
			{
			if(m) paths = get_random_paths(paths, n, m);
			data d = {0};
			d.shallow = 0;
			d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
			d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
			d.y = load_labels_paths(paths, n, labels, k);
			if(m) free(paths);
			return d;
			}

			data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation)
			data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure)
			{
			if(m) paths = get_random_paths(paths, n, m);
			data d = {0};
			d.w = size;
			d.h = size;
			d.shallow = 0;
			d.X = load_image_augment_paths(paths, n, min, max, size, angle, exposure, saturation);
			d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure);
			d.y = load_tags_paths(paths, n, k);
			if(m) free(paths);
			return d;

			@@ -54,6 +54,7 @@
			float angle;
			float saturation;
			float exposure;
			float hue;
			data *d;
			image *im;
			image *resized;
			@@ -74,11 +75,12 @@
			data load_data_captcha(char **paths, int n, int m, int k, int w, int h);
			data load_data_captcha_encode(char **paths, int n, int m, int w, int h);
			data load_data(char paths, int n, int m, char labels, int k, int w, int h);
			data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter);
			data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float exposure, float saturation);
			data load_data_augment(char paths, int n, int m, char labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
			data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure);
			data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
			matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure);
			data load_data_super(char **paths, int n, int m, int w, int h, int scale);
			data load_data_study(char paths, int n, int m, char labels, int k, int min, int max, int size, float angle, float exposure, float saturation);
			data load_data_study(char paths, int n, int m, char labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
			data load_data_augment(char paths, int n, int m, char labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure);
			data load_go(char *filename);

			box_label read_boxes(char filename, int *n);

			@@ -8,7 +8,7 @@
			#include "demo.h"
			#include <sys/time.h>

			#define FRAMES 1
			#define FRAMES 3

			#ifdef OPENCV
			#include "opencv2/highgui/highgui_c.h"
			@@ -48,7 +48,7 @@

			void detect_in_thread(void ptr)
			{
			float nms = .4;
			float nms = .1;

			detection_layer l = net.layers[net.n-1];
			float *X = det_s.data;
			@@ -153,13 +153,19 @@
			if(pthread_create(&fetch_thread, 0, fetch_in_thread, 0)) error("Thread creation failed");
			if(pthread_create(&detect_thread, 0, detect_in_thread, 0)) error("Thread creation failed");

			show_image(disp, "Demo");
			int c = cvWaitKey(1);
			if (c == 10){
			if(frame_skip == 0) frame_skip = 60;
			else if(frame_skip == 4) frame_skip = 0;
			else if(frame_skip == 60) frame_skip = 4;
			else frame_skip = 0;
			if(1){
			show_image(disp, "Demo");
			int c = cvWaitKey(1);
			if (c == 10){
			if(frame_skip == 0) frame_skip = 60;
			else if(frame_skip == 4) frame_skip = 0;
			else if(frame_skip == 60) frame_skip = 4;
			else frame_skip = 0;
			}
			}else{
			char buff[256];
			sprintf(buff, "/home/pjreddie/tmp/bag_%07d", count);
			save_image(disp, buff);
			}

			pthread_join(fetch_thread, 0);

			@@ -22,6 +22,8 @@
			l.coords = coords;
			l.rescore = rescore;
			l.side = side;
			l.w = side;
			l.h = side;
			assert(sideside((1 + l.coords)*l.n + l.classes) == inputs);
			l.cost = calloc(1, sizeof(float));
			l.outputs = l.inputs;
			@@ -44,6 +46,7 @@
			int locations = l.side*l.side;
			int i,j;
			memcpy(l.output, state.input, l.outputsl.batchsizeof(float));
			//if(l.reorg) reorg(l.output, l.wl.h, sizel.n, l.batch, 1);
			int b;
			if (l.softmax){
			for(b = 0; b < l.batch; ++b){
			@@ -204,6 +207,7 @@


			printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(countl.classes), avg_obj/count, avg_anyobj/(l.batchlocations*l.n), count);
			//if(l.reorg) reorg(l.delta, l.wl.h, sizel.n, l.batch, 0);
			}
			}

			@@ -51,6 +51,11 @@
			args.d = &buffer;
			args.type = DETECTION_DATA;

			args.angle = net.angle;
			args.exposure = net.exposure;
			args.saturation = net.saturation;
			args.hue = net.hue;

			pthread_t load_thread = load_data_in_thread(args);
			clock_t time;
			//while(iimgs < N120){

			@@ -18,13 +18,9 @@
			int ksize, int stride, int pad, float* data_col)
			{
			int c,h,w;
			int height_col = (height - ksize) / stride + 1;
			int width_col = (width - ksize) / stride + 1;
			if (pad){
			height_col = 1 + (height-1) / stride;
			width_col = 1 + (width-1) / stride;
			pad = ksize/2;
			}
			int height_col = (height + 2*pad - ksize) / stride + 1;
			int width_col = (width + 2*pad - ksize) / stride + 1;

			int channels_col = channels * ksize * ksize;
			for (c = 0; c < channels_col; ++c) {
			int w_offset = c % ksize;

			@@ -33,8 +33,12 @@
			for (int j = 0; j < ksize; ++j) {
			int h = h_in + i;
			int w = w_in + j;

			*data_col_ptr = (h >= 0 && w >= 0 && h < height && w < width) ?
			data_im_ptr[i * width + j] : 0;

			//data_col_ptr = data_im_ptr[ii width + jj];

			data_col_ptr += height_col * width_col;
			}
			}
			@@ -46,7 +50,6 @@
			int ksize, int stride, int pad, float *data_col){
			// We are going to launch channels * height_col * width_col kernels, each
			// kernel responsible for copying a single-channel grid.
			pad = pad ? ksize/2 : 0;
			int height_col = (height + 2 * pad - ksize) / stride + 1;
			int width_col = (width + 2 * pad - ksize) / stride + 1;
			int num_kernels = channels * height_col * width_col;
			@@ -56,93 +59,3 @@
			stride, height_col,
			width_col, data_col);
			}
			/*
			__global__ void im2col_pad_kernel(float *im,
			int channels, int height, int width,
			int ksize, int stride, float *data_col)
			{
			int c,h,w;
			int height_col = 1 + (height-1) / stride;
			int width_col = 1 + (width-1) / stride;
			int channels_col = channels * ksize * ksize;

			int pad = ksize/2;

			int id = (blockIdx.x + blockIdx.ygridDim.x) blockDim.x + threadIdx.x;
			int col_size = height_colwidth_colchannels_col;
			if (id >= col_size) return;

			int col_index = id;
			w = id % width_col;
			id /= width_col;
			h = id % height_col;
			id /= height_col;
			c = id % channels_col;
			id /= channels_col;

			int w_offset = c % ksize;
			int h_offset = (c / ksize) % ksize;
			int im_channel = c / ksize / ksize;
			int im_row = h_offset + h * stride - pad;
			int im_col = w_offset + w * stride - pad;

			int im_index = im_col + width(im_row + heightim_channel);
			float val = (im_row < 0 \|\| im_col < 0 \|\| im_row >= height \|\| im_col >= width) ? 0 : im[im_index];

			data_col[col_index] = val;
			}

			__global__ void im2col_nopad_kernel(float *im,
			int channels, int height, int width,
			int ksize, int stride, float *data_col)
			{
			int c,h,w;
			int height_col = (height - ksize) / stride + 1;
			int width_col = (width - ksize) / stride + 1;
			int channels_col = channels * ksize * ksize;

			int id = (blockIdx.x + blockIdx.ygridDim.x) blockDim.x + threadIdx.x;
			int col_size = height_colwidth_colchannels_col;
			if (id >= col_size) return;

			int col_index = id;
			w = id % width_col;
			id /= width_col;
			h = id % height_col;
			id /= height_col;
			c = id % channels_col;
			id /= channels_col;

			int w_offset = c % ksize;
			int h_offset = (c / ksize) % ksize;
			int im_channel = c / ksize / ksize;
			int im_row = h_offset + h * stride;
			int im_col = w_offset + w * stride;

			int im_index = im_col + width(im_row + heightim_channel);
			float val = (im_row < 0 \|\| im_col < 0 \|\| im_row >= height \|\| im_col >= width) ? 0 : im[im_index];

			data_col[col_index] = val;
			}

			extern "C" void im2col_ongpu(float *im,
			int channels, int height, int width,
			int ksize, int stride, int pad, float *data_col)
			{

			int height_col = (height - ksize) / stride + 1;
			int width_col = (width - ksize) / stride + 1;
			int channels_col = channels * ksize * ksize;

			if (pad){
			height_col = 1 + (height-1) / stride;
			width_col = 1 + (width-1) / stride;
			}

			size_t n = channels_colheight_colwidth_col;

			if(pad)im2col_pad_kernel<<<cuda_gridsize(n),BLOCK>>>(im, channels, height, width, ksize, stride, data_col);
			else im2col_nopad_kernel<<<cuda_gridsize(n),BLOCK>>>(im, channels, height, width, ksize, stride, data_col);
			check_error(cudaPeekAtLastError());
			}
			*/

			@@ -1,6 +1,7 @@
			#include "image.h"
			#include "utils.h"
			#include "blas.h"
			#include "cuda.h"
			#include <stdio.h>
			#include <math.h>

			@@ -248,6 +249,26 @@

			void normalize_image(image p)
			{
			int i;
			float min = 9999999;
			float max = -999999;

			for(i = 0; i < p.hp.wp.c; ++i){
			float v = p.data[i];
			if(v < min) min = v;
			if(v > max) max = v;
			}
			if(max - min < .000000001){
			min = 0;
			max = 1;
			}
			for(i = 0; i < p.cp.wp.h; ++i){
			p.data[i] = (p.data[i] - min)/(max-min);
			}
			}

			void normalize_image2(image p)
			{
			float *min = calloc(p.c, sizeof(float));
			float *max = calloc(p.c, sizeof(float));
			int i,j;
			@@ -320,7 +341,6 @@
			}
			free_image(copy);
			if(0){
			//if(disp->height < 448 \|\| disp->width < 448 \|\| disp->height > 1000){
			int w = 448;
			int h = w*p.h/p.w;
			if(h > 1000){
			@@ -334,202 +354,202 @@
			}
			cvShowImage(buff, disp);
			cvReleaseImage(&disp);
			}
			}
			#endif

			void show_image(image p, const char *name)
			{
			void show_image(image p, const char *name)
			{
			#ifdef OPENCV
			show_image_cv(p, name);
			show_image_cv(p, name);
			#else
			fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
			save_image(p, name);
			fprintf(stderr, "Not compiled with OpenCV, saving to %s.png instead\n", name);
			save_image(p, name);
			#endif
			}
			}

			#ifdef OPENCV
			image get_image_from_stream(CvCapture *cap)
			{
			IplImage* src = cvQueryFrame(cap);
			if (!src) return make_empty_image(0,0,0);
			image im = ipl_to_image(src);
			rgbgr_image(im);
			return im;
			}
			image get_image_from_stream(CvCapture *cap)
			{
			IplImage* src = cvQueryFrame(cap);
			if (!src) return make_empty_image(0,0,0);
			image im = ipl_to_image(src);
			rgbgr_image(im);
			return im;
			}
			#endif

			#ifdef OPENCV
			void save_image_jpg(image p, const char *name)
			{
			image copy = copy_image(p);
			rgbgr_image(copy);
			int x,y,k;
			void save_image_jpg(image p, const char *name)
			{
			image copy = copy_image(p);
			if(p.c == 3) rgbgr_image(copy);
			int x,y,k;

			char buff[256];
			sprintf(buff, "%s.jpg", name);
			char buff[256];
			sprintf(buff, "%s.jpg", name);

			IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c);
			int step = disp->widthStep;
			for(y = 0; y < p.h; ++y){
			for(x = 0; x < p.w; ++x){
			for(k= 0; k < p.c; ++k){
			disp->imageData[ystep + xp.c + k] = (unsigned char)(get_pixel(copy,x,y,k)*255);
			}
			IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c);
			int step = disp->widthStep;
			for(y = 0; y < p.h; ++y){
			for(x = 0; x < p.w; ++x){
			for(k= 0; k < p.c; ++k){
			disp->imageData[ystep + xp.c + k] = (unsigned char)(get_pixel(copy,x,y,k)*255);
			}
			}
			cvSaveImage(buff, disp,0);
			cvReleaseImage(&disp);
			free_image(copy);
			}
			cvSaveImage(buff, disp,0);
			cvReleaseImage(&disp);
			free_image(copy);
			}
			#endif

			void save_image(image im, const char *name)
			{
			#ifdef OPENCV
			save_image_jpg(im, name);
			#else
			char buff[256];
			//sprintf(buff, "%s (%d)", name, windows);
			sprintf(buff, "%s.png", name);
			unsigned char data = calloc(im.wim.h*im.c, sizeof(char));
			int i,k;
			for(k = 0; k < im.c; ++k){
			for(i = 0; i < im.w*im.h; ++i){
			data[iim.c+k] = (unsigned char) (255im.data[i + kim.wim.h]);
			void save_image(image im, const char *name)
			{
			#ifdef OPENCV
			save_image_jpg(im, name);
			#else
			char buff[256];
			//sprintf(buff, "%s (%d)", name, windows);
			sprintf(buff, "%s.png", name);
			unsigned char data = calloc(im.wim.h*im.c, sizeof(char));
			int i,k;
			for(k = 0; k < im.c; ++k){
			for(i = 0; i < im.w*im.h; ++i){
			data[iim.c+k] = (unsigned char) (255im.data[i + kim.wim.h]);
			}
			}
			int success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c);
			free(data);
			if(!success) fprintf(stderr, "Failed to write image %s\n", buff);
			#endif
			}


			void show_image_layers(image p, char *name)
			{
			int i;
			char buff[256];
			for(i = 0; i < p.c; ++i){
			sprintf(buff, "%s - Layer %d", name, i);
			image layer = get_image_layer(p, i);
			show_image(layer, buff);
			free_image(layer);
			}
			}

			void show_image_collapsed(image p, char *name)
			{
			image c = collapse_image_layers(p, 1);
			show_image(c, name);
			free_image(c);
			}

			image make_empty_image(int w, int h, int c)
			{
			image out;
			out.data = 0;
			out.h = h;
			out.w = w;
			out.c = c;
			return out;
			}

			image make_image(int w, int h, int c)
			{
			image out = make_empty_image(w,h,c);
			out.data = calloc(hwc, sizeof(float));
			return out;
			}

			image make_random_image(int w, int h, int c)
			{
			image out = make_empty_image(w,h,c);
			out.data = calloc(hwc, sizeof(float));
			int i;
			for(i = 0; i < whc; ++i){
			out.data[i] = (rand_normal() * .25) + .5;
			}
			return out;
			}

			image float_to_image(int w, int h, int c, float *data)
			{
			image out = make_empty_image(w,h,c);
			out.data = data;
			return out;
			}

			image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
			{
			int x, y, c;
			float cx = im.w/2.;
			float cy = im.h/2.;
			image rot = make_image(w, h, im.c);
			for(c = 0; c < im.c; ++c){
			for(y = 0; y < h; ++y){
			for(x = 0; x < w; ++x){
			float rx = cos(rad)(x/s + dx/s -cx) - sin(rad)(y/s + dy/s -cy) + cx;
			float ry = sin(rad)(x/s + dx/s -cx) + cos(rad)(y/s + dy/s -cy) + cy;
			float val = bilinear_interpolate(im, rx, ry, c);
			set_pixel(rot, x, y, c, val);
			}
			}
			int success = stbi_write_png(buff, im.w, im.h, im.c, data, im.w*im.c);
			free(data);
			if(!success) fprintf(stderr, "Failed to write image %s\n", buff);
			#endif
			}
			return rot;
			}


			void show_image_layers(image p, char *name)
			{
			int i;
			char buff[256];
			for(i = 0; i < p.c; ++i){
			sprintf(buff, "%s - Layer %d", name, i);
			image layer = get_image_layer(p, i);
			show_image(layer, buff);
			free_image(layer);
			image rotate_image(image im, float rad)
			{
			int x, y, c;
			float cx = im.w/2.;
			float cy = im.h/2.;
			image rot = make_image(im.w, im.h, im.c);
			for(c = 0; c < im.c; ++c){
			for(y = 0; y < im.h; ++y){
			for(x = 0; x < im.w; ++x){
			float rx = cos(rad)(x-cx) - sin(rad)(y-cy) + cx;
			float ry = sin(rad)(x-cx) + cos(rad)(y-cy) + cy;
			float val = bilinear_interpolate(im, rx, ry, c);
			set_pixel(rot, x, y, c, val);
			}
			}
			}
			return rot;
			}

			void show_image_collapsed(image p, char *name)
			{
			image c = collapse_image_layers(p, 1);
			show_image(c, name);
			free_image(c);
			}
			void translate_image(image m, float s)
			{
			int i;
			for(i = 0; i < m.hm.wm.c; ++i) m.data[i] += s;
			}

			image make_empty_image(int w, int h, int c)
			{
			image out;
			out.data = 0;
			out.h = h;
			out.w = w;
			out.c = c;
			return out;
			}
			void scale_image(image m, float s)
			{
			int i;
			for(i = 0; i < m.hm.wm.c; ++i) m.data[i] *= s;
			}

			image make_image(int w, int h, int c)
			{
			image out = make_empty_image(w,h,c);
			out.data = calloc(hwc, sizeof(float));
			return out;
			}

			image make_random_image(int w, int h, int c)
			{
			image out = make_empty_image(w,h,c);
			out.data = calloc(hwc, sizeof(float));
			int i;
			for(i = 0; i < whc; ++i){
			out.data[i] = (rand_normal() * .25) + .5;
			}
			return out;
			}

			image float_to_image(int w, int h, int c, float *data)
			{
			image out = make_empty_image(w,h,c);
			out.data = data;
			return out;
			}

			image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy)
			{
			int x, y, c;
			float cx = im.w/2.;
			float cy = im.h/2.;
			image rot = make_image(w, h, im.c);
			for(c = 0; c < im.c; ++c){
			for(y = 0; y < h; ++y){
			for(x = 0; x < w; ++x){
			float rx = cos(rad)(x/s + dx/s -cx) - sin(rad)(y/s + dy/s -cy) + cx;
			float ry = sin(rad)(x/s + dx/s -cx) + cos(rad)(y/s + dy/s -cy) + cy;
			float val = bilinear_interpolate(im, rx, ry, c);
			set_pixel(rot, x, y, c, val);
			image crop_image(image im, int dx, int dy, int w, int h)
			{
			image cropped = make_image(w, h, im.c);
			int i, j, k;
			for(k = 0; k < im.c; ++k){
			for(j = 0; j < h; ++j){
			for(i = 0; i < w; ++i){
			int r = j + dy;
			int c = i + dx;
			float val = 0;
			r = constrain_int(r, 0, im.h-1);
			c = constrain_int(c, 0, im.w-1);
			if (r >= 0 && r < im.h && c >= 0 && c < im.w) {
			val = get_pixel(im, c, r, k);
			}
			set_pixel(cropped, i, j, k, val);
			}
			}
			return rot;
			}

			image rotate_image(image im, float rad)
			{
			int x, y, c;
			float cx = im.w/2.;
			float cy = im.h/2.;
			image rot = make_image(im.w, im.h, im.c);
			for(c = 0; c < im.c; ++c){
			for(y = 0; y < im.h; ++y){
			for(x = 0; x < im.w; ++x){
			float rx = cos(rad)(x-cx) - sin(rad)(y-cy) + cx;
			float ry = sin(rad)(x-cx) + cos(rad)(y-cy) + cy;
			float val = bilinear_interpolate(im, rx, ry, c);
			set_pixel(rot, x, y, c, val);
			}
			}
			}
			return rot;
			}

			void translate_image(image m, float s)
			{
			int i;
			for(i = 0; i < m.hm.wm.c; ++i) m.data[i] += s;
			}

			void scale_image(image m, float s)
			{
			int i;
			for(i = 0; i < m.hm.wm.c; ++i) m.data[i] *= s;
			}

			image crop_image(image im, int dx, int dy, int w, int h)
			{
			image cropped = make_image(w, h, im.c);
			int i, j, k;
			for(k = 0; k < im.c; ++k){
			for(j = 0; j < h; ++j){
			for(i = 0; i < w; ++i){
			int r = j + dy;
			int c = i + dx;
			float val = 0;
			r = constrain_int(r, 0, im.h-1);
			c = constrain_int(c, 0, im.w-1);
			if (r >= 0 && r < im.h && c >= 0 && c < im.w) {
			val = get_pixel(im, c, r, k);
			}
			set_pixel(cropped, i, j, k, val);
			}
			}
			}
			return cropped;
			}
			return cropped;
			}

			int best_3d_shift_r(image a, image b, int min, int max)
			{
			@@ -666,7 +686,7 @@
			v = max;
			if(max == 0){
			s = 0;
			h = -1;
			h = 0;
			}else{
			s = delta/max;
			if(r == max){
			@@ -677,6 +697,7 @@
			h = 4 + (r - g) / delta;
			}
			if (h < 0) h += 6;
			h = h/6.;
			}
			set_pixel(im, i, j, 0, h);
			set_pixel(im, i, j, 1, s);
			@@ -694,7 +715,7 @@
			float f, p, q, t;
			for(j = 0; j < im.h; ++j){
			for(i = 0; i < im.w; ++i){
			h = get_pixel(im, i , j, 0);
			h = 6 * get_pixel(im, i , j, 0);
			s = get_pixel(im, i , j, 1);
			v = get_pixel(im, i , j, 2);
			if (s == 0) {
			@@ -781,6 +802,18 @@
			}
			}

			void translate_image_channel(image im, int c, float v)
			{
			int i, j;
			for(j = 0; j < im.h; ++j){
			for(i = 0; i < im.w; ++i){
			float pix = get_pixel(im, i, j, c);
			pix = pix+v;
			set_pixel(im, i, j, c, pix);
			}
			}
			}

			image binarize_image(image im)
			{
			image c = copy_image(im);
			@@ -800,6 +833,19 @@
			constrain_image(im);
			}

			void hue_image(image im, float hue)
			{
			rgb_to_hsv(im);
			int i;
			for(i = 0; i < im.w*im.h; ++i){
			im.data[i] = im.data[i] + hue;
			if (im.data[i] > 1) im.data[i] -= 1;
			if (im.data[i] < 0) im.data[i] += 1;
			}
			hsv_to_rgb(im);
			constrain_image(im);
			}

			void exposure_image(image im, float sat)
			{
			rgb_to_hsv(im);
			@@ -808,6 +854,29 @@
			constrain_image(im);
			}

			void distort_image(image im, float hue, float sat, float val)
			{
			rgb_to_hsv(im);
			scale_image_channel(im, 1, sat);
			scale_image_channel(im, 2, val);
			int i;
			for(i = 0; i < im.w*im.h; ++i){
			im.data[i] = im.data[i] + hue;
			if (im.data[i] > 1) im.data[i] -= 1;
			if (im.data[i] < 0) im.data[i] += 1;
			}
			hsv_to_rgb(im);
			constrain_image(im);
			}

			void random_distort_image(image im, float hue, float saturation, float exposure)
			{
			float dhue = rand_uniform(-hue, hue);
			float dsat = rand_scale(saturation);
			float dexp = rand_scale(exposure);
			distort_image(im, dhue, dsat, dexp);
			}

			void saturate_exposure_image(image im, float sat, float exposure)
			{
			rgb_to_hsv(im);
			@@ -876,7 +945,6 @@
			return resized;
			}

			#include "cuda.h"

			void test_resize(char *filename)
			{
			@@ -885,59 +953,40 @@
			printf("L2 Norm: %f\n", mag);
			image gray = grayscale_image(im);

			image sat2 = copy_image(im);
			saturate_image(sat2, 2);
			image c1 = copy_image(im);
			image c2 = copy_image(im);
			image c3 = copy_image(im);
			image c4 = copy_image(im);
			distort_image(c1, .1, 1.5, 1.5);
			distort_image(c2, -.1, .66666, .66666);
			distort_image(c3, .1, 1.5, .66666);
			distort_image(c4, .1, .66666, 1.5);

			image sat5 = copy_image(im);
			saturate_image(sat5, .5);

			image exp2 = copy_image(im);
			exposure_image(exp2, 2);

			image exp5 = copy_image(im);
			exposure_image(exp5, .5);

			image bin = binarize_image(im);

			/*
			#ifdef GPU
			image r = resize_image(im, im.w, im.h);
			image black = make_image(im.w2 + 3, im.h2 + 3, 9);
			image black2 = make_image(im.w, im.h, 3);

			float r_gpu = cuda_make_array(r.data, r.wr.h*r.c);
			float black_gpu = cuda_make_array(black.data, black.wblack.h*black.c);
			float black2_gpu = cuda_make_array(black2.data, black2.wblack2.h*black2.c);
			shortcut_gpu(3, r.w, r.h, 1, r_gpu, black.w, black.h, 3, black_gpu);
			//flip_image(r);
			//shortcut_gpu(3, r.w, r.h, 1, r.data, black.w, black.h, 3, black.data);

			shortcut_gpu(3, black.w, black.h, 3, black_gpu, black2.w, black2.h, 1, black2_gpu);
			cuda_pull_array(black_gpu, black.data, black.wblack.hblack.c);
			cuda_pull_array(black2_gpu, black2.data, black2.wblack2.hblack2.c);
			show_image_layers(black, "Black");
			show_image(black2, "Recreate");
			#endif
			*/
			image rot = rotate_crop_image(im, -.2618, 1, im.w/2, im.h/2, 0, 0);
			image rot3 = rotate_crop_image(im, -.2618, 2, im.w, im.h, im.w/2, 0);
			image rot2 = rotate_crop_image(im, -.2618, 1, im.w, im.h, 0, 0);
			show_image(rot, "Rotated");
			show_image(rot2, "base");

			show_image(rot3, "Rotated2");

			/*
			show_image(im, "Original");
			show_image(bin, "Binary");
			show_image(gray, "Gray");
			show_image(sat2, "Saturation-2");
			show_image(sat5, "Saturation-.5");
			show_image(exp2, "Exposure-2");
			show_image(exp5, "Exposure-.5");
			*/
			show_image(c1, "C1");
			show_image(c2, "C2");
			show_image(c3, "C3");
			show_image(c4, "C4");
			#ifdef OPENCV
			cvWaitKey(0);
			while(1){
			float exposure = 1.15;
			float saturation = 1.15;
			float hue = .05;

			image c = copy_image(im);

			float dexp = rand_scale(exposure);
			float dsat = rand_scale(saturation);
			float dhue = rand_uniform(-hue, hue);

			distort_image(c, dhue, dsat, dexp);
			show_image(c, "rand");
			printf("%f %f %f\n", dhue, dsat, dexp);
			free_image(c);
			cvWaitKey(0);
			}
			#endif
			}

			@@ -1180,10 +1229,8 @@
			image sized = resize_image(m, w, h);
			*/
			normalize_image(m);
			image sized = resize_image(m, m.w, m.h);
			save_image(sized, window);
			show_image(sized, window);
			free_image(sized);
			save_image(m, window);
			show_image(m, window);
			free_image(m);
			}

			@@ -32,6 +32,7 @@
			image crop_image(image im, int dx, int dy, int w, int h);
			image random_crop_image(image im, int w, int h);
			image random_augment_image(image im, float angle, int low, int high, int size);
			void random_distort_image(image im, float hue, float saturation, float exposure);
			image resize_image(image im, int w, int h);
			image resize_min(image im, int min);
			void translate_image(image m, float s);
			@@ -41,6 +42,7 @@
			void embed_image(image source, image dest, int dx, int dy);
			void saturate_image(image im, float sat);
			void exposure_image(image im, float sat);
			void distort_image(image im, float hue, float sat, float val);
			void saturate_exposure_image(image im, float sat, float exposure);
			void hsv_to_rgb(image im);
			void rgbgr_image(image im);

			@@ -72,6 +72,7 @@
			float saturation;
			float exposure;
			float shift;
			float ratio;
			int softmax;
			int classes;
			int coords;
			@@ -82,6 +83,7 @@
			int joint;
			int noadjust;
			int reorg;
			int log;

			float alpha;
			float beta;

			@@ -18,7 +18,7 @@
			return float_to_image(w,h,c,l.delta);
			}

			maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride)
			maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding)
			{
			fprintf(stderr, "Maxpool Layer: %d x %d x %d image, %d size, %d stride\n", h,w,c,size,stride);
			maxpool_layer l = {0};
			@@ -27,8 +27,9 @@
			l.h = h;
			l.w = w;
			l.c = c;
			l.out_w = (w-1)/stride + 1;
			l.out_h = (h-1)/stride + 1;
			l.pad = padding;
			l.out_w = (w + 2*padding - size + 1)/stride + 1;
			l.out_h = (h + 2*padding - size + 1)/stride + 1;
			l.out_c = c;
			l.outputs = l.out_h * l.out_w * l.out_c;
			l.inputs = hwc;
			@@ -48,13 +49,12 @@

			void resize_maxpool_layer(maxpool_layer *l, int w, int h)
			{
			int stride = l->stride;
			l->h = h;
			l->w = w;
			l->inputs = hwl->c;

			l->out_w = (w-1)/stride + 1;
			l->out_h = (h-1)/stride + 1;
			l->out_w = (w + 2*l->pad - l->size + 1)/l->stride + 1;
			l->out_h = (h + 2*l->pad - l->size + 1)/l->stride + 1;
			l->outputs = l->out_w * l->out_h * l->c;
			int output_size = l->outputs * l->batch;

			@@ -75,11 +75,11 @@
			void forward_maxpool_layer(const maxpool_layer l, network_state state)
			{
			int b,i,j,k,m,n;
			int w_offset = (-l.size-1)/2 + 1;
			int h_offset = (-l.size-1)/2 + 1;
			int w_offset = -l.pad;
			int h_offset = -l.pad;

			int h = (l.h-1)/l.stride + 1;
			int w = (l.w-1)/l.stride + 1;
			int h = l.out_h;
			int w = l.out_w;
			int c = l.c;

			for(b = 0; b < l.batch; ++b){
			@@ -112,8 +112,8 @@
			void backward_maxpool_layer(const maxpool_layer l, network_state state)
			{
			int i;
			int h = (l.h-1)/l.stride + 1;
			int w = (l.w-1)/l.stride + 1;
			int h = l.out_h;
			int w = l.out_w;
			int c = l.c;
			for(i = 0; i < hwc*l.batch; ++i){
			int index = l.indexes[i];

			@@ -9,7 +9,7 @@
			typedef layer maxpool_layer;

			image get_maxpool_image(maxpool_layer l);
			maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride);
			maxpool_layer make_maxpool_layer(int batch, int h, int w, int c, int size, int stride, int padding);
			void resize_maxpool_layer(maxpool_layer *l, int w, int h);
			void forward_maxpool_layer(const maxpool_layer l, network_state state);
			void backward_maxpool_layer(const maxpool_layer l, network_state state);

			@@ -7,10 +7,10 @@
			#include "cuda.h"
			}

			__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, float input, float output, int *indexes)
			__global__ void forward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float input, float output, int *indexes)
			{
			int h = (in_h-1)/stride + 1;
			int w = (in_w-1)/stride + 1;
			int h = (in_h + 2*pad - size + 1)/stride + 1;
			int w = (in_w + 2*pad - size + 1)/stride + 1;
			int c = in_c;

			int id = (blockIdx.x + blockIdx.ygridDim.x) blockDim.x + threadIdx.x;
			@@ -24,8 +24,8 @@
			id /= c;
			int b = id;

			int w_offset = (-size-1)/2 + 1;
			int h_offset = (-size-1)/2 + 1;
			int w_offset = -pad;
			int h_offset = -pad;

			int out_index = j + w(i + h(k + c*b));
			float max = -INFINITY;
			@@ -47,10 +47,10 @@
			indexes[out_index] = max_i;
			}

			__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, float delta, float prev_delta, int *indexes)
			__global__ void backward_maxpool_layer_kernel(int n, int in_h, int in_w, int in_c, int stride, int size, int pad, float delta, float prev_delta, int *indexes)
			{
			int h = (in_h-1)/stride + 1;
			int w = (in_w-1)/stride + 1;
			int h = (in_h + 2*pad - size + 1)/stride + 1;
			int w = (in_w + 2*pad - size + 1)/stride + 1;
			int c = in_c;
			int area = (size-1)/stride;

			@@ -66,8 +66,8 @@
			id /= in_c;
			int b = id;

			int w_offset = (-size-1)/2 + 1;
			int h_offset = (-size-1)/2 + 1;
			int w_offset = -pad;
			int h_offset = -pad;

			float d = 0;
			int l, m;
			@@ -86,13 +86,13 @@

			extern "C" void forward_maxpool_layer_gpu(maxpool_layer layer, network_state state)
			{
			int h = (layer.h-1)/layer.stride + 1;
			int w = (layer.w-1)/layer.stride + 1;
			int h = layer.out_h;
			int w = layer.out_w;
			int c = layer.c;

			size_t n = hwc*layer.batch;

			forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, state.input, layer.output_gpu, layer.indexes_gpu);
			forward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, state.input, layer.output_gpu, layer.indexes_gpu);
			check_error(cudaPeekAtLastError());
			}

			@@ -100,7 +100,7 @@
			{
			size_t n = layer.hlayer.wlayer.c*layer.batch;

			backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.delta_gpu, state.delta, layer.indexes_gpu);
			backward_maxpool_layer_kernel<<<cuda_gridsize(n), BLOCK>>>(n, layer.h, layer.w, layer.c, layer.stride, layer.size, layer.pad, layer.delta_gpu, state.delta, layer.indexes_gpu);
			check_error(cudaPeekAtLastError());
			}