~speedprog/mtg/mtg_card_detector.git

parent: 89292351 | patch | commit | show whitespace

Joseph Redmon

2015-12-14 db0397cfaaf488364e3d2e1669dfefae2ee6ea73

shortcut layers, msr networks

30 files modified

5 files added

	Makefile	2 ●●●●● patch \| view \| raw \| blame \| history
	cfg/msr_152.cfg	1408 ●●●●● patch \| view \| raw \| blame \| history
	cfg/msr_34.cfg	371 ●●●●● patch \| view \| raw \| blame \| history
	cfg/msr_50.cfg	490 ●●●●● patch \| view \| raw \| blame \| history
	src/avgpool_layer.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/blas.c	16 ●●●●● patch \| view \| raw \| blame \| history
	src/blas.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/blas_kernels.cu	25 ●●●●● patch \| view \| raw \| blame \| history
	src/classifier.c	7 ●●●●● patch \| view \| raw \| blame \| history
	src/connected_layer.c	32 ●●●●● patch \| view \| raw \| blame \| history
	src/connected_layer.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/convolutional_layer.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/cost_layer.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/crop_layer.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/cuda.c	1 ●●●●● patch \| view \| raw \| blame \| history
	src/data.c	92 ●●●●● patch \| view \| raw \| blame \| history
	src/data.h	3 ●●●●● patch \| view \| raw \| blame \| history
	src/deconvolutional_layer.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/detection_layer.c	2 ●●●●● patch \| view \| raw \| blame \| history
	src/detection_layer.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/dropout_layer.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/image.c	7 ●●●●● patch \| view \| raw \| blame \| history
	src/layer.h	12 ●●●●● patch \| view \| raw \| blame \| history
	src/local_layer.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/maxpool_layer.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/network.c	15 ●●●●● patch \| view \| raw \| blame \| history
	src/network.h	13 ●●●●● patch \| view \| raw \| blame \| history
	src/network_kernels.cu	11 ●●●●● patch \| view \| raw \| blame \| history
	src/normalization_layer.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/params.h	11 ●●●●● patch \| view \| raw \| blame \| history
	src/parser.c	66 ●●●●● patch \| view \| raw \| blame \| history
	src/shortcut_layer.c	64 ●●●●● patch \| view \| raw \| blame \| history
	src/shortcut_layer.h	16 ●●●●● patch \| view \| raw \| blame \| history
	src/softmax_layer.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/yolo.c	5 ●●●●● patch \| view \| raw \| blame \| history

 Makefile

@@ -34,7 +34,7 @@
LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand
endif

OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o
OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o
ifeq ($(GPU), 1) 
OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o yolo_kernels.o coco_kernels.o
endif

 cfg/msr_152.cfg

New file
@@ -0,0 +1,1408 @@
[net]
batch=256
subdivisions=16
height=256
width=256
channels=3
momentum=0.9
decay=0.0005

learning_rate=0.02

[crop]
crop_height=224
crop_width=224
flip=1
saturation=1
exposure=1
angle=0

##### Conv 1 #####

[convolutional]
batch_normalize=1
filters=64
size=7
stride=2
pad=1
activation=leaky

[maxpool]
size=3
stride=2


##### Conv 2_x #####


[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4


##### Conv 3_x #####

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4


##### Conv 4_x #####

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4


##### Conv 5_x #####

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=2048
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=2048
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=2048
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[avgpool]

[connected]
output=1000
activation=leaky

[softmax]
groups=1

[cost]
type=sse


 cfg/msr_34.cfg

New file
@@ -0,0 +1,371 @@
[net]
batch=128
subdivisions=1
height=256
width=256
channels=3
momentum=0.9
decay=0.0005

learning_rate=0.1
policy=poly
power=4
max_batches=500000

#policy=sigmoid
#gamma=.00008
#step=100000
#max_batches=200000

[crop]
crop_height=224
crop_width=224
flip=1
saturation=1
exposure=1
angle=0

[convolutional]
batch_normalize=1
filters=64
size=7
stride=2
pad=1
activation=leaky

[maxpool]
size=3
stride=2

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3




[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3






[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3









[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from = -3

[avgpool]

[connected]
output=1000
activation=leaky

[softmax]
groups=1

[cost]
type=sse


 cfg/msr_50.cfg

New file
@@ -0,0 +1,490 @@
[net]
batch=128
subdivisions=4
height=256
width=256
channels=3
momentum=0.9
decay=0.0005

learning_rate=0.01

[crop]
crop_height=224
crop_width=224
flip=1
saturation=1
exposure=1
angle=0

##### Conv 1 #####

[convolutional]
batch_normalize=1
filters=64
size=7
stride=2
pad=1
activation=leaky

[maxpool]
size=3
stride=2


##### Conv 2_x #####


[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4


##### Conv 3_x #####

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4


##### Conv 4_x #####

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4


##### Conv 5_x #####

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=2048
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=2048
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=2048
size=1
stride=1
pad=1
activation=leaky

[shortcut]
from = -4

[avgpool]

[connected]
output=1000
activation=leaky

[softmax]
groups=1

[cost]
type=sse


 src/avgpool_layer.h

@@ -2,9 +2,9 @@
#define AVGPOOL_LAYER_H

#include "image.h"
#include "params.h"
#include "cuda.h"
#include "layer.h"
#include "network.h"

typedef layer avgpool_layer;


 src/blas.c

@@ -1,6 +1,22 @@
#include "blas.h"
#include "math.h"

void shortcut_cpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2)
{
    int i,j,k,b;
    for(b = 0; b < batch; ++b){
        for(k = 0; k < c && k < c2; ++k){
            for(j = 0; j < h/sample; ++j){
                for(i = 0; i < w/sample; ++i){
                    int out_index = i*sample + w*(j*sample + h*(k + c*b));
                    int add_index = b*w*stride/sample*h*stride/sample*c2 + i*stride + w*stride/sample*(j*stride + h*stride/sample*k);
                    out[out_index] += add[add_index];
                }
            }
        }
    }
}

void mean_cpu(float *x, int batch, int filters, int spatial, float *mean)
{
    float scale = 1./(batch * spatial);

 src/blas.h

@@ -15,6 +15,7 @@
void scal_cpu(int N, float ALPHA, float *X, int INCX);
float dot_cpu(int N, float *X, int INCX, float *Y, int INCY);
void test_gpu_blas();
void shortcut_cpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2);

void mean_cpu(float *x, int batch, int filters, int spatial, float *mean);
void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
@@ -43,5 +44,6 @@

void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance);
void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean);
void shortcut_gpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2);
#endif
#endif

 src/blas_kernels.cu

@@ -228,6 +228,7 @@
    if(i < N) Y[i*INCY] *= X[i*INCX];
}


extern "C" void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial)
{
    size_t N = batch*filters*spatial;
@@ -372,3 +373,27 @@
    fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
    check_error(cudaPeekAtLastError());
}

__global__ void shortcut_kernel(int size, float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2, int min_c)
{
    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (id >= size) return;
    int i = id % (w/sample);
    id /= (w/sample);
    int j = id % (h/sample);
    id /= (h/sample);
    int k = id % min_c;
    id /= min_c;
    int b = id;
    int out_index = i*sample + w*(j*sample + h*(k + c*b));
    int add_index = b*w*stride/sample*h*stride/sample*c2 + i*stride + w*stride/sample*(j*stride + h*stride/sample*k);
    out[out_index] += add[add_index];
}

extern "C" void shortcut_gpu(float *out, int w, int h, int c, int batch, int sample, float *add, int stride, int c2)
{
    int min_c = (c < c2) ? c : c2;
    int size = batch * w/sample * h/sample * min_c;
    shortcut_kernel<<<cuda_gridsize(size), BLOCK>>>(size, out, w, h, c, batch, sample, add, stride, c2, min_c);
    check_error(cudaPeekAtLastError());
}

 src/classifier.c

@@ -131,7 +131,7 @@
    char *label_list = option_find_str(options, "labels", "data/labels.list");
    char *valid_list = option_find_str(options, "valid", "data/train.list");
    int classes = option_find_int(options, "classes", 2);
    int topk = option_find_int(options, "topk", 1);
    int topk = option_find_int(options, "top", 1);

    char **labels = get_labels(label_list);
    list *plist = get_paths(valid_list);
@@ -194,11 +194,12 @@

    list *options = read_data_cfg(datacfg);

    char *label_list = option_find_str(options, "labels", "data/labels.list");
    char *name_list = option_find_str(options, "names", 0);
    if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list");
    int top = option_find_int(options, "top", 1);

    int i = 0;
    char **names = get_labels(label_list);
    char **names = get_labels(name_list);
    clock_t time;
    int indexes[10];
    char buff[256];

 src/connected_layer.c

@@ -25,13 +25,13 @@
    l.weight_updates = calloc(inputs*outputs, sizeof(float));
    l.bias_updates = calloc(outputs, sizeof(float));

    l.weights = calloc(inputs*outputs, sizeof(float));
    l.weights = calloc(outputs*inputs, sizeof(float));
    l.biases = calloc(outputs, sizeof(float));


    //float scale = 1./sqrt(inputs);
    float scale = sqrt(2./inputs);
    for(i = 0; i < inputs*outputs; ++i){
    for(i = 0; i < outputs*inputs; ++i){
        l.weights[i] = 2*scale*rand_uniform() - scale;
    }

@@ -40,10 +40,10 @@
    }

#ifdef GPU
    l.weights_gpu = cuda_make_array(l.weights, inputs*outputs);
    l.weights_gpu = cuda_make_array(l.weights, outputs*inputs);
    l.biases_gpu = cuda_make_array(l.biases, outputs);

    l.weight_updates_gpu = cuda_make_array(l.weight_updates, inputs*outputs);
    l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs);
    l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs);

    l.output_gpu = cuda_make_array(l.output, outputs*batch);
@@ -76,7 +76,7 @@
    float *a = state.input;
    float *b = l.weights;
    float *c = l.output;
    gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
    gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
    activate_array(l.output, l.outputs*l.batch, l.activation);
}

@@ -87,11 +87,11 @@
    for(i = 0; i < l.batch; ++i){
        axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
    }
    int m = l.inputs;
    int m = l.outputs;
    int k = l.batch;
    int n = l.outputs;
    float *a = state.input;
    float *b = l.delta;
    int n = l.inputs;
    float *a = l.delta;
    float *b = state.input;
    float *c = l.weight_updates;
    gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);

@@ -103,7 +103,7 @@
    b = l.weights;
    c = state.delta;

    if(c) gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
    if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
}

#ifdef GPU
@@ -146,7 +146,7 @@
    float * a = state.input;
    float * b = l.weights_gpu;
    float * c = l.output_gpu;
    gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
    gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
    activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);

/*
@@ -163,11 +163,11 @@
    for(i = 0; i < l.batch; ++i){
        axpy_ongpu_offset(l.outputs, 1, l.delta_gpu, i*l.outputs, 1, l.bias_updates_gpu, 0, 1);
    }
    int m = l.inputs;
    int m = l.outputs;
    int k = l.batch;
    int n = l.outputs;
    float * a = state.input;
    float * b = l.delta_gpu;
    int n = l.inputs;
    float * a = l.delta_gpu;
    float * b = state.input;
    float * c = l.weight_updates_gpu;
    gemm_ongpu(1,0,m,n,k,1,a,m,b,n,1,c,n);

@@ -179,6 +179,6 @@
    b = l.weights_gpu;
    c = state.delta;

    if(c) gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
    if(c) gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
}
#endif

 src/connected_layer.h

@@ -2,8 +2,8 @@
#define CONNECTED_LAYER_H

#include "activations.h"
#include "params.h"
#include "layer.h"
#include "network.h"

typedef layer connected_layer;


 src/convolutional_layer.h

@@ -6,6 +6,7 @@
#include "image.h"
#include "activations.h"
#include "layer.h"
#include "network.h"

typedef layer convolutional_layer;


 src/cost_layer.h

@@ -1,7 +1,7 @@
#ifndef COST_LAYER_H
#define COST_LAYER_H
#include "params.h"
#include "layer.h"
#include "network.h"

typedef layer cost_layer;


 src/crop_layer.h

@@ -4,6 +4,7 @@
#include "image.h"
#include "params.h"
#include "layer.h"
#include "network.h"

typedef layer crop_layer;


 src/cuda.c

@@ -67,6 +67,7 @@
        status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
        check_error(status);
    }
    if(!x_gpu) error("Cuda malloc failed\n");
    return x_gpu;
}


 src/data.c

@@ -149,6 +149,43 @@
    }
}

void fill_truth_swag(char *path, float *truth, int classes, int flip, float dx, float dy, float sx, float sy)
{
    char *labelpath = find_replace(path, "images", "labels");
    labelpath = find_replace(labelpath, "JPEGImages", "labels");

    labelpath = find_replace(labelpath, ".jpg", ".txt");
    labelpath = find_replace(labelpath, ".JPG", ".txt");
    labelpath = find_replace(labelpath, ".JPEG", ".txt");
    int count = 0;
    box_label *boxes = read_boxes(labelpath, &count);
    randomize_boxes(boxes, count);
    correct_boxes(boxes, count, dx, dy, sx, sy, flip);
    float x,y,w,h;
    int id;
    int i;

    for (i = 0; i < count && i < 30; ++i) {
        x =  boxes[i].x;
        y =  boxes[i].y;
        w =  boxes[i].w;
        h =  boxes[i].h;
        id = boxes[i].id;

        if (w < .0 || h < .0) continue;

        int index = (4+classes) * i;

        truth[index++] = x;
        truth[index++] = y;
        truth[index++] = w;
        truth[index++] = h;

        if (id < classes) truth[index+id] = 1;
    }
    free(boxes);
}

void fill_truth_region(char *path, float *truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy)
{
    char *labelpath = find_replace(path, "images", "labels");
@@ -482,6 +519,59 @@
    return d;
}

data load_data_swag(char **paths, int n, int classes, float jitter)
{
    int index = rand_r(&data_seed)%n;
    char *random_path = paths[index];
    
    image orig = load_image_color(random_path, 0, 0);
    int h = orig.h;
    int w = orig.w;

    data d;
    d.shallow = 0;
    d.w = w;
    d.h = h;

    d.X.rows = 1;
    d.X.vals = calloc(d.X.rows, sizeof(float*));
    d.X.cols = h*w*3;

    int k = (4+classes)*30;
    d.y = make_matrix(1, k);

    int dw = w*jitter;
    int dh = h*jitter;

    int pleft  = (rand_uniform() * 2*dw - dw);
    int pright = (rand_uniform() * 2*dw - dw);
    int ptop   = (rand_uniform() * 2*dh - dh);
    int pbot   = (rand_uniform() * 2*dh - dh);

    int swidth =  w - pleft - pright;
    int sheight = h - ptop - pbot;

    float sx = (float)swidth  / w;
    float sy = (float)sheight / h;

    int flip = rand_r(&data_seed)%2;
    image cropped = crop_image(orig, pleft, ptop, swidth, sheight);

    float dx = ((float)pleft/w)/sx;
    float dy = ((float)ptop /h)/sy;

    image sized = resize_image(cropped, w, h);
    if(flip) flip_image(sized);
    d.X.vals[0] = sized.data;

    fill_truth_swag(random_path, d.y.vals[0], classes, flip, dx, dy, 1./sx, 1./sy);

    free_image(orig);
    free_image(cropped);

    return d;
}

data load_data_detection(int n, char **paths, int m, int classes, int w, int h, int num_boxes, int background)
{
    char **random_paths = get_random_paths(paths, n, m);
@@ -559,6 +649,8 @@
        *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
    } else if (a.type == REGION_DATA){
        *a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
    } else if (a.type == SWAG_DATA){
        *a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
    } else if (a.type == COMPARE_DATA){
        *a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h);
    } else if (a.type == IMAGE_DATA){

 src/data.h

@@ -20,13 +20,14 @@
}

typedef struct{
    int w, h;
    matrix X;
    matrix y;
    int shallow;
} data;

typedef enum {
    CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA
    CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA
} data_type;

typedef struct load_args{

 src/deconvolutional_layer.h

@@ -6,6 +6,7 @@
#include "image.h"
#include "activations.h"
#include "layer.h"
#include "network.h"

typedef layer deconvolutional_layer;


 src/detection_layer.c

@@ -146,7 +146,7 @@
                }
                float iou  = box_iou(out, truth);

                //printf("%d", best_index);
                //printf("%d,", best_index);
                int p_index = index + locations*l.classes + i*l.n + best_index;
                *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2);
                *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2);

 src/detection_layer.h

@@ -1,8 +1,8 @@
#ifndef REGION_LAYER_H
#define REGION_LAYER_H

#include "params.h"
#include "layer.h"
#include "network.h"

typedef layer detection_layer;


 src/dropout_layer.h

@@ -3,6 +3,7 @@

#include "params.h"
#include "layer.h"
#include "network.h"

typedef layer dropout_layer;


 src/image.c

@@ -1,5 +1,6 @@
#include "image.h"
#include "utils.h"
#include "blas.h"
#include <stdio.h>
#include <math.h>

@@ -708,8 +709,14 @@
    image exp5 = copy_image(im);
    exposure_image(exp5, .5);

    image r = resize_image(im, im.w/2, im.h/2);

    image black = make_image(im.w, im.h, im.c);
    shortcut_cpu(black.data, im.w, im.h, im.c, 1, 2, r.data, 1, r.c);

    show_image(im, "Original");
    show_image(gray, "Gray");
    show_image(black, "Black");
    show_image(sat2, "Saturation-2");
    show_image(sat5, "Saturation-.5");
    show_image(exp2, "Exposure-2");

 src/layer.h

@@ -3,6 +3,9 @@

#include "activations.h"

struct layer;
typedef struct layer layer;

typedef enum {
    CONVOLUTIONAL,
    DECONVOLUTIONAL,
@@ -16,20 +19,22 @@
    COST,
    NORMALIZATION,
    AVGPOOL,
    LOCAL
    LOCAL,
    SHORTCUT
} LAYER_TYPE;

typedef enum{
    SSE, MASKED
} COST_TYPE;

typedef struct {
struct layer{
    LAYER_TYPE type;
    ACTIVATION activation;
    COST_TYPE cost_type;
    int batch_normalize;
    int batch;
    int forced;
    int flipped;
    int inputs;
    int outputs;
    int truths;
@@ -45,6 +50,7 @@
    int crop_height;
    int sqrt;
    int flip;
    int index;
    float angle;
    float jitter;
    float saturation;
@@ -144,7 +150,7 @@
    float * squared_gpu;
    float * norms_gpu;
    #endif
} layer;
};

void free_layer(layer);


 src/local_layer.h

@@ -2,10 +2,10 @@
#define LOCAL_LAYER_H

#include "cuda.h"
#include "params.h"
#include "image.h"
#include "activations.h"
#include "layer.h"
#include "network.h"

typedef layer local_layer;


 src/maxpool_layer.h

@@ -5,6 +5,7 @@
#include "params.h"
#include "cuda.h"
#include "layer.h"
#include "network.h"

typedef layer maxpool_layer;


 src/network.c

@@ -19,6 +19,7 @@
#include "softmax_layer.h"
#include "dropout_layer.h"
#include "route_layer.h"
#include "shortcut_layer.h"

int get_current_batch(network net)
{
@@ -94,6 +95,8 @@
            return "cost";
        case ROUTE:
            return "route";
        case SHORTCUT:
            return "shortcut";
        case NORMALIZATION:
            return "normalization";
        default:
@@ -119,6 +122,7 @@
{
    int i;
    for(i = 0; i < net.n; ++i){
        state.index = i;
        layer l = net.layers[i];
        if(l.delta){
            scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
@@ -149,6 +153,8 @@
            forward_dropout_layer(l, state);
        } else if(l.type == ROUTE){
            forward_route_layer(l, net);
        } else if(l.type == SHORTCUT){
            forward_shortcut_layer(l, state);
        }
        state.input = l.output;
    }
@@ -211,6 +217,7 @@
    float *original_input = state.input;
    float *original_delta = state.delta;
    for(i = net.n-1; i >= 0; --i){
        state.index = i;
        if(i == 0){
            state.input = original_input;
            state.delta = original_delta;
@@ -244,6 +251,8 @@
            backward_cost_layer(l, state);
        } else if(l.type == ROUTE){
            backward_route_layer(l, net);
        } else if(l.type == SHORTCUT){
            backward_shortcut_layer(l, state);
        }
    }
}
@@ -255,6 +264,8 @@
    if(gpu_index >= 0) return train_network_datum_gpu(net, x, y);
#endif
    network_state state;
    state.index = 0;
    state.net = net;
    state.input = x;
    state.delta = 0;
    state.truth = y;
@@ -307,6 +318,8 @@
{
    int i,j;
    network_state state;
    state.index = 0;
    state.net = net;
    state.train = 1;
    state.delta = 0;
    float sum = 0;
@@ -443,6 +456,8 @@
#endif

    network_state state;
    state.net = net;
    state.index = 0;
    state.input = input;
    state.truth = 0;
    state.train = 0;

 src/network.h

@@ -3,15 +3,15 @@
#define NETWORK_H

#include "image.h"
#include "detection_layer.h"
#include "layer.h"
#include "data.h"
#include "params.h"

typedef enum {
    CONSTANT, STEP, EXP, POLY, STEPS, SIG
} learning_rate_policy;

typedef struct {
typedef struct network{
    int n;
    int batch;
    int *seen;
@@ -43,6 +43,15 @@
    #endif
} network;

typedef struct network_state {
    float *truth;
    float *input;
    float *delta;
    int train;
    int index;
    network net;
} network_state;

#ifdef GPU
float train_network_datum_gpu(network net, float *x, float *y);
float *network_predict_gpu(network net, float *input);

 src/network_kernels.cu

@@ -27,6 +27,7 @@
#include "softmax_layer.h"
#include "dropout_layer.h"
#include "route_layer.h"
#include "shortcut_layer.h"
#include "blas.h"
}

@@ -38,6 +39,7 @@
{
    int i;
    for(i = 0; i < net.n; ++i){
        state.index = i;
        layer l = net.layers[i];
        if(l.delta_gpu){
            fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
@@ -68,6 +70,8 @@
            forward_dropout_layer_gpu(l, state);
        } else if(l.type == ROUTE){
            forward_route_layer_gpu(l, net);
        } else if(l.type == SHORTCUT){
            forward_shortcut_layer_gpu(l, state);
        }
        state.input = l.output_gpu;
    }
@@ -79,6 +83,7 @@
    float * original_input = state.input;
    float * original_delta = state.delta;
    for(i = net.n-1; i >= 0; --i){
        state.index = i;
        layer l = net.layers[i];
        if(i == 0){
            state.input = original_input;
@@ -112,6 +117,8 @@
            backward_cost_layer_gpu(l, state);
        } else if(l.type == ROUTE){
            backward_route_layer_gpu(l, net);
        } else if(l.type == SHORTCUT){
            backward_shortcut_layer_gpu(l, state);
        }
    }
}
@@ -138,6 +145,8 @@
float train_network_datum_gpu(network net, float *x, float *y)
{
    network_state state;
    state.index = 0;
    state.net = net;
    int x_size = get_network_input_size(net)*net.batch;
    int y_size = get_network_output_size(net)*net.batch;
    if(net.layers[net.n-1].type == DETECTION) y_size = net.layers[net.n-1].truths*net.batch;
@@ -178,6 +187,8 @@
{
    int size = get_network_input_size(net) * net.batch;
    network_state state;
    state.index = 0;
    state.net = net;
    state.input = cuda_make_array(input, size);
    state.truth = 0;
    state.train = 0;

 src/normalization_layer.h

@@ -3,7 +3,7 @@

#include "image.h"
#include "layer.h"
#include "params.h"
#include "network.h"

layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa);
void resize_normalization_layer(layer *layer, int h, int w);

 src/params.h

@@ -1,12 +1 @@
#ifndef PARAMS_H
#define PARAMS_H

typedef struct {
    float *truth;
    float *input;
    float *delta;
    int train;
} network_state;

#endif


 src/parser.c

@@ -17,6 +17,7 @@
#include "avgpool_layer.h"
#include "local_layer.h"
#include "route_layer.h"
#include "shortcut_layer.h"
#include "list.h"
#include "option_list.h"
#include "utils.h"
@@ -37,6 +38,7 @@
int is_softmax(section *s);
int is_normalization(section *s);
int is_crop(section *s);
int is_shortcut(section *s);
int is_cost(section *s);
int is_detection(section *s);
int is_route(section *s);
@@ -80,6 +82,7 @@
    int h;
    int w;
    int c;
    int index;
} size_params;

deconvolutional_layer parse_deconvolutional(list *options, size_params params)
@@ -148,6 +151,7 @@
    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);

    convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,pad,activation, batch_normalize);
    layer.flipped = option_find_int_quiet(options, "flipped", 0);

    char *weights = option_find_str(options, "weights", 0);
    char *biases = option_find_str(options, "biases", 0);
@@ -287,6 +291,20 @@
    return l;
}

layer parse_shortcut(list *options, size_params params, network net)
{
    char *l = option_find(options, "from");   
    int index = atoi(l);
    if(index < 0) index = params.index + index;

    int batch = params.batch;
    layer from = net.layers[index];

    layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c);
    return s;
}


route_layer parse_route(list *options, size_params params, network net)
{
    char *l = option_find(options, "layers");   
@@ -303,6 +321,7 @@
    for(i = 0; i < n; ++i){
        int index = atoi(l);
        l = strchr(l, ',')+1;
        if(index < 0) index = params.index + index;
        layers[i] = index;
        sizes[i] = net.layers[index].outputs;
    }
@@ -419,6 +438,7 @@
    int count = 0;
    free_section(s);
    while(n){
        params.index = count;
        fprintf(stderr, "%d: ", count);
        s = (section *)n->val;
        options = s->options;
@@ -447,6 +467,8 @@
            l = parse_avgpool(options, params);
        }else if(is_route(s)){
            l = parse_route(options, params, net);
        }else if(is_shortcut(s)){
            l = parse_shortcut(options, params, net);
        }else if(is_dropout(s)){
            l = parse_dropout(options, params);
            l.output = net.layers[count-1].output;
@@ -464,13 +486,13 @@
        net.layers[count] = l;
        free_section(s);
        n = n->next;
        ++count;
        if(n){
            params.h = l.out_h;
            params.w = l.out_w;
            params.c = l.out_c;
            params.inputs = l.outputs;
        }
        ++count;
    }   
    free_list(sections);
    net.outputs = get_network_output_size(net);
@@ -478,6 +500,10 @@
    return net;
}

int is_shortcut(section *s)
{
    return (strcmp(s->type, "[shortcut]")==0);
}
int is_crop(section *s)
{
    return (strcmp(s->type, "[crop]")==0);
@@ -625,9 +651,12 @@
    FILE *fp = fopen(filename, "w");
    if(!fp) file_error(filename);

    fwrite(&net.learning_rate, sizeof(float), 1, fp);
    fwrite(&net.momentum, sizeof(float), 1, fp);
    fwrite(&net.decay, sizeof(float), 1, fp);
    int major = 0;
    int minor = 1;
    int revision = 0;
    fwrite(&major, sizeof(int), 1, fp);
    fwrite(&minor, sizeof(int), 1, fp);
    fwrite(&revision, sizeof(int), 1, fp);
    fwrite(net.seen, sizeof(int), 1, fp);

    int i;
@@ -674,6 +703,19 @@
    save_weights_upto(net, filename, net.n);
}

void transpose_matrix(float *a, int rows, int cols)
{
    float *transpose = calloc(rows*cols, sizeof(float));
    int x, y;
    for(x = 0; x < rows; ++x){
        for(y = 0; y < cols; ++y){
            transpose[y*rows + x] = a[x*cols + y];
        }
    }
    memcpy(a, transpose, rows*cols*sizeof(float));
    free(transpose);
}

void load_weights_upto(network *net, char *filename, int cutoff)
{
    fprintf(stderr, "Loading weights from %s...", filename);
@@ -681,10 +723,12 @@
    FILE *fp = fopen(filename, "r");
    if(!fp) file_error(filename);

    float garbage;
    fread(&garbage, sizeof(float), 1, fp);
    fread(&garbage, sizeof(float), 1, fp);
    fread(&garbage, sizeof(float), 1, fp);
    int major;
    int minor;
    int revision;
    fread(&major, sizeof(int), 1, fp);
    fread(&minor, sizeof(int), 1, fp);
    fread(&revision, sizeof(int), 1, fp);
    fread(net->seen, sizeof(int), 1, fp);

    int i;
@@ -700,6 +744,9 @@
                fread(l.rolling_variance, sizeof(float), l.n, fp);
            }
            fread(l.filters, sizeof(float), num, fp);
            if (l.flipped) {
                transpose_matrix(l.filters, l.c*l.size*l.size, l.n);
            }
#ifdef GPU
            if(gpu_index >= 0){
                push_convolutional_layer(l);
@@ -719,6 +766,9 @@
        if(l.type == CONNECTED){
            fread(l.biases, sizeof(float), l.outputs, fp);
            fread(l.weights, sizeof(float), l.outputs*l.inputs, fp);
            if(major > 1000 || minor > 1000){
                transpose_matrix(l.weights, l.inputs, l.outputs);
            }
#ifdef GPU
            if(gpu_index >= 0){
                push_connected_layer(l);

 src/shortcut_layer.c

New file
@@ -0,0 +1,64 @@
#include "shortcut_layer.h"
#include "cuda.h"
#include "blas.h"
#include <stdio.h>
#include <assert.h>

layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2)
{
    fprintf(stderr,"Shortcut Layer: %d\n", index);
    layer l = {0};
    l.type = SHORTCUT;
    l.batch = batch;
    l.w = w;
    l.h = h;
    l.c = c;
    l.out_w = w;
    l.out_h = h;
    l.out_c = c;
    l.outputs = w*h*c;
    l.inputs = w*h*c;
    int stride = w2 / w;

    assert(stride * w == w2);
    assert(stride * h == h2);
    assert(c >= c2);

    l.stride = stride;
    l.n = c2;
    l.index = index;

    l.delta =  calloc(l.outputs*batch, sizeof(float));
    l.output = calloc(l.outputs*batch, sizeof(float));;
    #ifdef GPU
    l.delta_gpu =  cuda_make_array(l.delta, l.outputs*batch);
    l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
    #endif
    return l;
}

void forward_shortcut_layer(const layer l, network_state state)
{
    copy_cpu(l.outputs*l.batch, state.input, 1, l.output, 1);
    shortcut_cpu(l.output, l.w, l.h, l.c, l.batch, 1, state.net.layers[l.index].output, l.stride, l.n);
}

void backward_shortcut_layer(const layer l, network_state state)
{
    copy_cpu(l.outputs*l.batch, l.delta, 1, state.delta, 1);
    shortcut_cpu(state.net.layers[l.index].delta, l.w*l.stride, l.h*l.stride, l.n, l.batch, l.stride, l.delta, 1, l.c);
}

#ifdef GPU
void forward_shortcut_layer_gpu(const layer l, network_state state)
{
    copy_ongpu(l.outputs*l.batch, state.input, 1, l.output_gpu, 1);
    shortcut_gpu(l.output_gpu, l.w, l.h, l.c, l.batch, 1, state.net.layers[l.index].output_gpu, l.stride, l.n);
}

void backward_shortcut_layer_gpu(const layer l, network_state state)
{
    copy_ongpu(l.outputs*l.batch, l.delta_gpu, 1, state.delta, 1);
    shortcut_gpu(state.net.layers[l.index].delta_gpu, l.w*l.stride, l.h*l.stride, l.n, l.batch, l.stride, l.delta_gpu, 1, l.c);
}
#endif

 src/shortcut_layer.h

New file
@@ -0,0 +1,16 @@
#ifndef SHORTCUT_LAYER_H
#define SHORTCUT_LAYER_H

#include "layer.h"
#include "network.h"

layer make_shortcut_layer(int batch, int index, int w, int h, int c, int w2, int h2, int c2);
void forward_shortcut_layer(const layer l, network_state state);
void backward_shortcut_layer(const layer l, network_state state);

#ifdef GPU
void forward_shortcut_layer_gpu(const layer l, network_state state);
void backward_shortcut_layer_gpu(const layer l, network_state state);
#endif

#endif

 src/softmax_layer.h

@@ -2,6 +2,7 @@
#define SOFTMAX_LAYER_H
#include "params.h"
#include "layer.h"
#include "network.h"

typedef layer softmax_layer;


 src/yolo.c

@@ -255,9 +255,8 @@
    int i=0;

    float thresh = .001;
    int nms = 0;
    float iou_thresh = .5;
    float nms_thresh = .5;
    float nms = 0;

    int total = 0;
    int correct = 0;
@@ -271,7 +270,7 @@
        char *id = basecfg(path);
        float *predictions = network_predict(net, sized.data);
        convert_yolo_detections(predictions, classes, l.n, square, side, 1, 1, thresh, probs, boxes, 1);
        if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms_thresh);
        if (nms) do_nms(boxes, probs, side*side*l.n, 1, nms);

        char *labelpath = find_replace(path, "images", "labels");
        labelpath = find_replace(labelpath, "JPEGImages", "labels");

			@@ -34,7 +34,7 @@
			LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand
			endif

			OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o
			OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o
			ifeq ($(GPU), 1)
			OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o yolo_kernels.o coco_kernels.o
			endif

New file
			@@ -0,0 +1,1408 @@
			[net]
			batch=256
			subdivisions=16
			height=256
			width=256
			channels=3
			momentum=0.9
			decay=0.0005

			learning_rate=0.02

			[crop]
			crop_height=224
			crop_width=224
			flip=1
			saturation=1
			exposure=1
			angle=0

			##### Conv 1 #####

			[convolutional]
			batch_normalize=1
			filters=64
			size=7
			stride=2
			pad=1
			activation=leaky

			[maxpool]
			size=3
			stride=2


			##### Conv 2_x #####


			[convolutional]
			batch_normalize=1
			filters=64
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=64
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=64
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4


			##### Conv 3_x #####

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4


			##### Conv 4_x #####

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4


			##### Conv 5_x #####

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=2048
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=2048
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=2048
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[avgpool]

			[connected]
			output=1000
			activation=leaky

			[softmax]
			groups=1

			[cost]
			type=sse

New file
			@@ -0,0 +1,371 @@
			[net]
			batch=128
			subdivisions=1
			height=256
			width=256
			channels=3
			momentum=0.9
			decay=0.0005

			learning_rate=0.1
			policy=poly
			power=4
			max_batches=500000

			#policy=sigmoid
			#gamma=.00008
			#step=100000
			#max_batches=200000

			[crop]
			crop_height=224
			crop_width=224
			flip=1
			saturation=1
			exposure=1
			angle=0

			[convolutional]
			batch_normalize=1
			filters=64
			size=7
			stride=2
			pad=1
			activation=leaky

			[maxpool]
			size=3
			stride=2

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3




			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3






			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3









			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -3

			[avgpool]

			[connected]
			output=1000
			activation=leaky

			[softmax]
			groups=1

			[cost]
			type=sse

New file
			@@ -0,0 +1,490 @@
			[net]
			batch=128
			subdivisions=4
			height=256
			width=256
			channels=3
			momentum=0.9
			decay=0.0005

			learning_rate=0.01

			[crop]
			crop_height=224
			crop_width=224
			flip=1
			saturation=1
			exposure=1
			angle=0

			##### Conv 1 #####

			[convolutional]
			batch_normalize=1
			filters=64
			size=7
			stride=2
			pad=1
			activation=leaky

			[maxpool]
			size=3
			stride=2


			##### Conv 2_x #####


			[convolutional]
			batch_normalize=1
			filters=64
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=64
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=64
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4


			##### Conv 3_x #####

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4


			##### Conv 4_x #####

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4


			##### Conv 5_x #####

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=2048
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=2048
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=2048
			size=1
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from = -4

			[avgpool]

			[connected]
			output=1000
			activation=leaky

			[softmax]
			groups=1

			[cost]
			type=sse

			@@ -2,9 +2,9 @@
			#define AVGPOOL_LAYER_H

			#include "image.h"
			#include "params.h"
			#include "cuda.h"
			#include "layer.h"
			#include "network.h"

			typedef layer avgpool_layer;

			@@ -1,6 +1,22 @@
			#include "blas.h"
			#include "math.h"

			void shortcut_cpu(float out, int w, int h, int c, int batch, int sample, float add, int stride, int c2)
			{
			int i,j,k,b;
			for(b = 0; b < batch; ++b){
			for(k = 0; k < c && k < c2; ++k){
			for(j = 0; j < h/sample; ++j){
			for(i = 0; i < w/sample; ++i){
			int out_index = isample + w(jsample + h(k + c*b));
			int add_index = bwstride/samplehstride/samplec2 + istride + wstride/sample(jstride + hstride/sample*k);
			out[out_index] += add[add_index];
			}
			}
			}
			}
			}

			void mean_cpu(float x, int batch, int filters, int spatial, float mean)
			{
			float scale = 1./(batch * spatial);

			@@ -15,6 +15,7 @@
			void scal_cpu(int N, float ALPHA, float *X, int INCX);
			float dot_cpu(int N, float X, int INCX, float Y, int INCY);
			void test_gpu_blas();
			void shortcut_cpu(float out, int w, int h, int c, int batch, int sample, float add, int stride, int c2);

			void mean_cpu(float x, int batch, int filters, int spatial, float mean);
			void variance_cpu(float x, float mean, int batch, int filters, int spatial, float *variance);
			@@ -43,5 +44,6 @@

			void fast_variance_gpu(float x, float mean, int batch, int filters, int spatial, float *variance);
			void fast_mean_gpu(float x, int batch, int filters, int spatial, float mean);
			void shortcut_gpu(float out, int w, int h, int c, int batch, int sample, float add, int stride, int c2);
			#endif
			#endif

			@@ -228,6 +228,7 @@
			if(i < N) Y[iINCY] = X[i*INCX];
			}


			extern "C" void normalize_gpu(float x, float mean, float *variance, int batch, int filters, int spatial)
			{
			size_t N = batchfiltersspatial;
			@@ -372,3 +373,27 @@
			fill_kernel<<<cuda_gridsize(N), BLOCK>>>(N, ALPHA, X, INCX);
			check_error(cudaPeekAtLastError());
			}

			__global__ void shortcut_kernel(int size, float out, int w, int h, int c, int batch, int sample, float add, int stride, int c2, int min_c)
			{
			int id = (blockIdx.x + blockIdx.ygridDim.x) blockDim.x + threadIdx.x;
			if (id >= size) return;
			int i = id % (w/sample);
			id /= (w/sample);
			int j = id % (h/sample);
			id /= (h/sample);
			int k = id % min_c;
			id /= min_c;
			int b = id;
			int out_index = isample + w(jsample + h(k + c*b));
			int add_index = bwstride/samplehstride/samplec2 + istride + wstride/sample(jstride + hstride/sample*k);
			out[out_index] += add[add_index];
			}

			extern "C" void shortcut_gpu(float out, int w, int h, int c, int batch, int sample, float add, int stride, int c2)
			{
			int min_c = (c < c2) ? c : c2;
			int size = batch * w/sample * h/sample * min_c;
			shortcut_kernel<<<cuda_gridsize(size), BLOCK>>>(size, out, w, h, c, batch, sample, add, stride, c2, min_c);
			check_error(cudaPeekAtLastError());
			}

			@@ -131,7 +131,7 @@
			char *label_list = option_find_str(options, "labels", "data/labels.list");
			char *valid_list = option_find_str(options, "valid", "data/train.list");
			int classes = option_find_int(options, "classes", 2);
			int topk = option_find_int(options, "topk", 1);
			int topk = option_find_int(options, "top", 1);

			char **labels = get_labels(label_list);
			list *plist = get_paths(valid_list);
			@@ -194,11 +194,12 @@

			list *options = read_data_cfg(datacfg);

			char *label_list = option_find_str(options, "labels", "data/labels.list");
			char *name_list = option_find_str(options, "names", 0);
			if(!name_list) name_list = option_find_str(options, "labels", "data/labels.list");
			int top = option_find_int(options, "top", 1);

			int i = 0;
			char **names = get_labels(label_list);
			char **names = get_labels(name_list);
			clock_t time;
			int indexes[10];
			char buff[256];

			@@ -25,13 +25,13 @@
			l.weight_updates = calloc(inputs*outputs, sizeof(float));
			l.bias_updates = calloc(outputs, sizeof(float));

			l.weights = calloc(inputs*outputs, sizeof(float));
			l.weights = calloc(outputs*inputs, sizeof(float));
			l.biases = calloc(outputs, sizeof(float));


			//float scale = 1./sqrt(inputs);
			float scale = sqrt(2./inputs);
			for(i = 0; i < inputs*outputs; ++i){
			for(i = 0; i < outputs*inputs; ++i){
			l.weights[i] = 2scalerand_uniform() - scale;
			}

			@@ -40,10 +40,10 @@
			}

			#ifdef GPU
			l.weights_gpu = cuda_make_array(l.weights, inputs*outputs);
			l.weights_gpu = cuda_make_array(l.weights, outputs*inputs);
			l.biases_gpu = cuda_make_array(l.biases, outputs);

			l.weight_updates_gpu = cuda_make_array(l.weight_updates, inputs*outputs);
			l.weight_updates_gpu = cuda_make_array(l.weight_updates, outputs*inputs);
			l.bias_updates_gpu = cuda_make_array(l.bias_updates, outputs);

			l.output_gpu = cuda_make_array(l.output, outputs*batch);
			@@ -76,7 +76,7 @@
			float *a = state.input;
			float *b = l.weights;
			float *c = l.output;
			gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
			gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
			activate_array(l.output, l.outputs*l.batch, l.activation);
			}

			@@ -87,11 +87,11 @@
			for(i = 0; i < l.batch; ++i){
			axpy_cpu(l.outputs, 1, l.delta + i*l.outputs, 1, l.bias_updates, 1);
			}
			int m = l.inputs;
			int m = l.outputs;
			int k = l.batch;
			int n = l.outputs;
			float *a = state.input;
			float *b = l.delta;
			int n = l.inputs;
			float *a = l.delta;
			float *b = state.input;
			float *c = l.weight_updates;
			gemm(1,0,m,n,k,1,a,m,b,n,1,c,n);

			@@ -103,7 +103,7 @@
			b = l.weights;
			c = state.delta;

			if(c) gemm(0,1,m,n,k,1,a,k,b,k,1,c,n);
			if(c) gemm(0,0,m,n,k,1,a,k,b,n,1,c,n);
			}

			#ifdef GPU
			@@ -146,7 +146,7 @@
			float * a = state.input;
			float * b = l.weights_gpu;
			float * c = l.output_gpu;
			gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
			gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
			activate_array_ongpu(l.output_gpu, l.outputs*l.batch, l.activation);

			/*
			@@ -163,11 +163,11 @@
			for(i = 0; i < l.batch; ++i){
			axpy_ongpu_offset(l.outputs, 1, l.delta_gpu, i*l.outputs, 1, l.bias_updates_gpu, 0, 1);
			}
			int m = l.inputs;
			int m = l.outputs;
			int k = l.batch;
			int n = l.outputs;
			float * a = state.input;
			float * b = l.delta_gpu;
			int n = l.inputs;
			float * a = l.delta_gpu;
			float * b = state.input;
			float * c = l.weight_updates_gpu;
			gemm_ongpu(1,0,m,n,k,1,a,m,b,n,1,c,n);

			@@ -179,6 +179,6 @@
			b = l.weights_gpu;
			c = state.delta;

			if(c) gemm_ongpu(0,1,m,n,k,1,a,k,b,k,1,c,n);
			if(c) gemm_ongpu(0,0,m,n,k,1,a,k,b,n,1,c,n);
			}
			#endif

			@@ -2,8 +2,8 @@
			#define CONNECTED_LAYER_H

			#include "activations.h"
			#include "params.h"
			#include "layer.h"
			#include "network.h"

			typedef layer connected_layer;

			@@ -6,6 +6,7 @@
			#include "image.h"
			#include "activations.h"
			#include "layer.h"
			#include "network.h"

			typedef layer convolutional_layer;

			@@ -1,7 +1,7 @@
			#ifndef COST_LAYER_H
			#define COST_LAYER_H
			#include "params.h"
			#include "layer.h"
			#include "network.h"

			typedef layer cost_layer;

			@@ -4,6 +4,7 @@
			#include "image.h"
			#include "params.h"
			#include "layer.h"
			#include "network.h"

			typedef layer crop_layer;

			@@ -67,6 +67,7 @@
			status = cudaMemcpy(x_gpu, x, size, cudaMemcpyHostToDevice);
			check_error(status);
			}
			if(!x_gpu) error("Cuda malloc failed\n");
			return x_gpu;
			}

			@@ -149,6 +149,43 @@
			}
			}

			void fill_truth_swag(char path, float truth, int classes, int flip, float dx, float dy, float sx, float sy)
			{
			char *labelpath = find_replace(path, "images", "labels");
			labelpath = find_replace(labelpath, "JPEGImages", "labels");

			labelpath = find_replace(labelpath, ".jpg", ".txt");
			labelpath = find_replace(labelpath, ".JPG", ".txt");
			labelpath = find_replace(labelpath, ".JPEG", ".txt");
			int count = 0;
			box_label *boxes = read_boxes(labelpath, &count);
			randomize_boxes(boxes, count);
			correct_boxes(boxes, count, dx, dy, sx, sy, flip);
			float x,y,w,h;
			int id;
			int i;

			for (i = 0; i < count && i < 30; ++i) {
			x = boxes[i].x;
			y = boxes[i].y;
			w = boxes[i].w;
			h = boxes[i].h;
			id = boxes[i].id;

			if (w < .0 \|\| h < .0) continue;

			int index = (4+classes) * i;

			truth[index++] = x;
			truth[index++] = y;
			truth[index++] = w;
			truth[index++] = h;

			if (id < classes) truth[index+id] = 1;
			}
			free(boxes);
			}

			void fill_truth_region(char path, float truth, int classes, int num_boxes, int flip, float dx, float dy, float sx, float sy)
			{
			char *labelpath = find_replace(path, "images", "labels");
			@@ -482,6 +519,59 @@
			return d;
			}

			data load_data_swag(char **paths, int n, int classes, float jitter)
			{
			int index = rand_r(&data_seed)%n;
			char *random_path = paths[index];

			image orig = load_image_color(random_path, 0, 0);
			int h = orig.h;
			int w = orig.w;

			data d;
			d.shallow = 0;
			d.w = w;
			d.h = h;

			d.X.rows = 1;
			d.X.vals = calloc(d.X.rows, sizeof(float*));
			d.X.cols = hw3;

			int k = (4+classes)*30;
			d.y = make_matrix(1, k);

			int dw = w*jitter;
			int dh = h*jitter;

			int pleft = (rand_uniform() * 2*dw - dw);
			int pright = (rand_uniform() * 2*dw - dw);
			int ptop = (rand_uniform() * 2*dh - dh);
			int pbot = (rand_uniform() * 2*dh - dh);

			int swidth = w - pleft - pright;
			int sheight = h - ptop - pbot;

			float sx = (float)swidth / w;
			float sy = (float)sheight / h;

			int flip = rand_r(&data_seed)%2;
			image cropped = crop_image(orig, pleft, ptop, swidth, sheight);

			float dx = ((float)pleft/w)/sx;
			float dy = ((float)ptop /h)/sy;

			image sized = resize_image(cropped, w, h);
			if(flip) flip_image(sized);
			d.X.vals[0] = sized.data;

			fill_truth_swag(random_path, d.y.vals[0], classes, flip, dx, dy, 1./sx, 1./sy);

			free_image(orig);
			free_image(cropped);

			return d;
			}

			data load_data_detection(int n, char **paths, int m, int classes, int w, int h, int num_boxes, int background)
			{
			char **random_paths = get_random_paths(paths, n, m);
			@@ -559,6 +649,8 @@
			*a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h);
			} else if (a.type == REGION_DATA){
			*a.d = load_data_region(a.n, a.paths, a.m, a.w, a.h, a.num_boxes, a.classes, a.jitter);
			} else if (a.type == SWAG_DATA){
			*a.d = load_data_swag(a.paths, a.n, a.classes, a.jitter);
			} else if (a.type == COMPARE_DATA){
			*a.d = load_data_compare(a.n, a.paths, a.m, a.classes, a.w, a.h);
			} else if (a.type == IMAGE_DATA){

			@@ -20,13 +20,14 @@
			}

			typedef struct{
			int w, h;
			matrix X;
			matrix y;
			int shallow;
			} data;

			typedef enum {
			CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA
			CLASSIFICATION_DATA, DETECTION_DATA, CAPTCHA_DATA, REGION_DATA, IMAGE_DATA, COMPARE_DATA, WRITING_DATA, SWAG_DATA
			} data_type;

			typedef struct load_args{

			@@ -146,7 +146,7 @@
			}
			float iou = box_iou(out, truth);

			//printf("%d", best_index);
			//printf("%d,", best_index);
			int p_index = index + locationsl.classes + il.n + best_index;
			(l.cost) -= l.noobject_scale pow(l.output[p_index], 2);
			(l.cost) += l.object_scale pow(1-l.output[p_index], 2);

			@@ -1,8 +1,8 @@
			#ifndef REGION_LAYER_H
			#define REGION_LAYER_H

			#include "params.h"
			#include "layer.h"
			#include "network.h"

			typedef layer detection_layer;

			@@ -3,6 +3,7 @@

			#include "params.h"
			#include "layer.h"
			#include "network.h"

			typedef layer dropout_layer;

			@@ -1,5 +1,6 @@
			#include "image.h"
			#include "utils.h"
			#include "blas.h"
			#include <stdio.h>
			#include <math.h>

			@@ -708,8 +709,14 @@
			image exp5 = copy_image(im);
			exposure_image(exp5, .5);

			image r = resize_image(im, im.w/2, im.h/2);

			image black = make_image(im.w, im.h, im.c);
			shortcut_cpu(black.data, im.w, im.h, im.c, 1, 2, r.data, 1, r.c);

			show_image(im, "Original");
			show_image(gray, "Gray");
			show_image(black, "Black");
			show_image(sat2, "Saturation-2");
			show_image(sat5, "Saturation-.5");
			show_image(exp2, "Exposure-2");

			@@ -3,6 +3,9 @@

			#include "activations.h"

			struct layer;
			typedef struct layer layer;

			typedef enum {
			CONVOLUTIONAL,
			DECONVOLUTIONAL,
			@@ -16,20 +19,22 @@
			COST,
			NORMALIZATION,
			AVGPOOL,
			LOCAL
			LOCAL,
			SHORTCUT
			} LAYER_TYPE;

			typedef enum{
			SSE, MASKED
			} COST_TYPE;

			typedef struct {
			struct layer{
			LAYER_TYPE type;
			ACTIVATION activation;
			COST_TYPE cost_type;
			int batch_normalize;
			int batch;
			int forced;
			int flipped;
			int inputs;
			int outputs;
			int truths;
			@@ -45,6 +50,7 @@
			int crop_height;
			int sqrt;
			int flip;
			int index;
			float angle;
			float jitter;
			float saturation;
			@@ -144,7 +150,7 @@
			float * squared_gpu;
			float * norms_gpu;
			#endif
			} layer;
			};

			void free_layer(layer);

			@@ -2,10 +2,10 @@
			#define LOCAL_LAYER_H

			#include "cuda.h"
			#include "params.h"
			#include "image.h"
			#include "activations.h"
			#include "layer.h"
			#include "network.h"

			typedef layer local_layer;

			@@ -5,6 +5,7 @@
			#include "params.h"
			#include "cuda.h"
			#include "layer.h"
			#include "network.h"

			typedef layer maxpool_layer;

			@@ -19,6 +19,7 @@
			#include "softmax_layer.h"
			#include "dropout_layer.h"
			#include "route_layer.h"
			#include "shortcut_layer.h"

			int get_current_batch(network net)
			{
			@@ -94,6 +95,8 @@
			return "cost";
			case ROUTE:
			return "route";
			case SHORTCUT:
			return "shortcut";
			case NORMALIZATION:
			return "normalization";
			default:
			@@ -119,6 +122,7 @@
			{
			int i;
			for(i = 0; i < net.n; ++i){
			state.index = i;
			layer l = net.layers[i];
			if(l.delta){
			scal_cpu(l.outputs * l.batch, 0, l.delta, 1);
			@@ -149,6 +153,8 @@
			forward_dropout_layer(l, state);
			} else if(l.type == ROUTE){
			forward_route_layer(l, net);
			} else if(l.type == SHORTCUT){
			forward_shortcut_layer(l, state);
			}
			state.input = l.output;
			}
			@@ -211,6 +217,7 @@
			float *original_input = state.input;
			float *original_delta = state.delta;
			for(i = net.n-1; i >= 0; --i){
			state.index = i;
			if(i == 0){
			state.input = original_input;
			state.delta = original_delta;
			@@ -244,6 +251,8 @@
			backward_cost_layer(l, state);
			} else if(l.type == ROUTE){
			backward_route_layer(l, net);
			} else if(l.type == SHORTCUT){
			backward_shortcut_layer(l, state);
			}
			}
			}
			@@ -255,6 +264,8 @@
			if(gpu_index >= 0) return train_network_datum_gpu(net, x, y);
			#endif
			network_state state;
			state.index = 0;
			state.net = net;
			state.input = x;
			state.delta = 0;
			state.truth = y;
			@@ -307,6 +318,8 @@
			{
			int i,j;
			network_state state;
			state.index = 0;
			state.net = net;
			state.train = 1;
			state.delta = 0;
			float sum = 0;
			@@ -443,6 +456,8 @@
			#endif

			network_state state;
			state.net = net;
			state.index = 0;
			state.input = input;
			state.truth = 0;
			state.train = 0;

			@@ -3,15 +3,15 @@
			#define NETWORK_H

			#include "image.h"
			#include "detection_layer.h"
			#include "layer.h"
			#include "data.h"
			#include "params.h"

			typedef enum {
			CONSTANT, STEP, EXP, POLY, STEPS, SIG
			} learning_rate_policy;

			typedef struct {
			typedef struct network{
			int n;
			int batch;
			int *seen;
			@@ -43,6 +43,15 @@
			#endif
			} network;

			typedef struct network_state {
			float *truth;
			float *input;
			float *delta;
			int train;
			int index;
			network net;
			} network_state;

			#ifdef GPU
			float train_network_datum_gpu(network net, float x, float y);
			float network_predict_gpu(network net, float input);

			@@ -27,6 +27,7 @@
			#include "softmax_layer.h"
			#include "dropout_layer.h"
			#include "route_layer.h"
			#include "shortcut_layer.h"
			#include "blas.h"
			}

			@@ -38,6 +39,7 @@
			{
			int i;
			for(i = 0; i < net.n; ++i){
			state.index = i;
			layer l = net.layers[i];
			if(l.delta_gpu){
			fill_ongpu(l.outputs * l.batch, 0, l.delta_gpu, 1);
			@@ -68,6 +70,8 @@
			forward_dropout_layer_gpu(l, state);
			} else if(l.type == ROUTE){
			forward_route_layer_gpu(l, net);
			} else if(l.type == SHORTCUT){
			forward_shortcut_layer_gpu(l, state);
			}
			state.input = l.output_gpu;
			}
			@@ -79,6 +83,7 @@
			float * original_input = state.input;
			float * original_delta = state.delta;
			for(i = net.n-1; i >= 0; --i){
			state.index = i;
			layer l = net.layers[i];
			if(i == 0){
			state.input = original_input;
			@@ -112,6 +117,8 @@
			backward_cost_layer_gpu(l, state);
			} else if(l.type == ROUTE){
			backward_route_layer_gpu(l, net);
			} else if(l.type == SHORTCUT){
			backward_shortcut_layer_gpu(l, state);
			}
			}
			}
			@@ -138,6 +145,8 @@
			float train_network_datum_gpu(network net, float x, float y)
			{
			network_state state;
			state.index = 0;
			state.net = net;
			int x_size = get_network_input_size(net)*net.batch;
			int y_size = get_network_output_size(net)*net.batch;
			if(net.layers[net.n-1].type == DETECTION) y_size = net.layers[net.n-1].truths*net.batch;
			@@ -178,6 +187,8 @@
			{
			int size = get_network_input_size(net) * net.batch;
			network_state state;
			state.index = 0;
			state.net = net;
			state.input = cuda_make_array(input, size);
			state.truth = 0;
			state.train = 0;

			@@ -3,7 +3,7 @@

			#include "image.h"
			#include "layer.h"
			#include "params.h"
			#include "network.h"

			layer make_normalization_layer(int batch, int w, int h, int c, int size, float alpha, float beta, float kappa);
			void resize_normalization_layer(layer *layer, int h, int w);

			@@ -1,12 +1 @@
			#ifndef PARAMS_H
			#define PARAMS_H

			typedef struct {
			float *truth;
			float *input;
			float *delta;
			int train;
			} network_state;

			#endif

			@@ -17,6 +17,7 @@
			#include "avgpool_layer.h"
			#include "local_layer.h"
			#include "route_layer.h"
			#include "shortcut_layer.h"
			#include "list.h"
			#include "option_list.h"
			#include "utils.h"
			@@ -37,6 +38,7 @@
			int is_softmax(section *s);
			int is_normalization(section *s);
			int is_crop(section *s);
			int is_shortcut(section *s);
			int is_cost(section *s);
			int is_detection(section *s);
			int is_route(section *s);
			@@ -80,6 +82,7 @@
			int h;
			int w;
			int c;
			int index;
			} size_params;

			deconvolutional_layer parse_deconvolutional(list *options, size_params params)
			@@ -148,6 +151,7 @@
			int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);

			convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,pad,activation, batch_normalize);
			layer.flipped = option_find_int_quiet(options, "flipped", 0);

			char *weights = option_find_str(options, "weights", 0);
			char *biases = option_find_str(options, "biases", 0);
			@@ -287,6 +291,20 @@
			return l;
			}

			layer parse_shortcut(list *options, size_params params, network net)
			{
			char *l = option_find(options, "from");
			int index = atoi(l);
			if(index < 0) index = params.index + index;

			int batch = params.batch;
			layer from = net.layers[index];

			layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c);
			return s;
			}


			route_layer parse_route(list *options, size_params params, network net)
			{
			char *l = option_find(options, "layers");
			@@ -303,6 +321,7 @@
			for(i = 0; i < n; ++i){
			int index = atoi(l);
			l = strchr(l, ',')+1;
			if(index < 0) index = params.index + index;
			layers[i] = index;
			sizes[i] = net.layers[index].outputs;
			}
			@@ -419,6 +438,7 @@
			int count = 0;
			free_section(s);
			while(n){
			params.index = count;
			fprintf(stderr, "%d: ", count);
			s = (section *)n->val;
			options = s->options;
			@@ -447,6 +467,8 @@
			l = parse_avgpool(options, params);
			}else if(is_route(s)){
			l = parse_route(options, params, net);
			}else if(is_shortcut(s)){
			l = parse_shortcut(options, params, net);
			}else if(is_dropout(s)){
			l = parse_dropout(options, params);
			l.output = net.layers[count-1].output;
			@@ -464,13 +486,13 @@
			net.layers[count] = l;
			free_section(s);
			n = n->next;
			++count;
			if(n){
			params.h = l.out_h;
			params.w = l.out_w;
			params.c = l.out_c;
			params.inputs = l.outputs;
			}
			++count;
			}
			free_list(sections);
			net.outputs = get_network_output_size(net);
			@@ -478,6 +500,10 @@
			return net;
			}

			int is_shortcut(section *s)
			{
			return (strcmp(s->type, "[shortcut]")==0);
			}
			int is_crop(section *s)
			{
			return (strcmp(s->type, "[crop]")==0);
			@@ -625,9 +651,12 @@
			FILE *fp = fopen(filename, "w");
			if(!fp) file_error(filename);

			fwrite(&net.learning_rate, sizeof(float), 1, fp);
			fwrite(&net.momentum, sizeof(float), 1, fp);
			fwrite(&net.decay, sizeof(float), 1, fp);
			int major = 0;
			int minor = 1;
			int revision = 0;
			fwrite(&major, sizeof(int), 1, fp);
			fwrite(&minor, sizeof(int), 1, fp);
			fwrite(&revision, sizeof(int), 1, fp);
			fwrite(net.seen, sizeof(int), 1, fp);

			int i;
			@@ -674,6 +703,19 @@
			save_weights_upto(net, filename, net.n);
			}

			void transpose_matrix(float *a, int rows, int cols)
			{
			float transpose = calloc(rowscols, sizeof(float));
			int x, y;
			for(x = 0; x < rows; ++x){
			for(y = 0; y < cols; ++y){
			transpose[yrows + x] = a[xcols + y];
			}
			}
			memcpy(a, transpose, rowscolssizeof(float));
			free(transpose);
			}

			void load_weights_upto(network net, char filename, int cutoff)
			{
			fprintf(stderr, "Loading weights from %s...", filename);
			@@ -681,10 +723,12 @@
			FILE *fp = fopen(filename, "r");
			if(!fp) file_error(filename);

			float garbage;
			fread(&garbage, sizeof(float), 1, fp);
			fread(&garbage, sizeof(float), 1, fp);
			fread(&garbage, sizeof(float), 1, fp);
			int major;
			int minor;
			int revision;
			fread(&major, sizeof(int), 1, fp);
			fread(&minor, sizeof(int), 1, fp);
			fread(&revision, sizeof(int), 1, fp);
			fread(net->seen, sizeof(int), 1, fp);

			int i;
			@@ -700,6 +744,9 @@
			fread(l.rolling_variance, sizeof(float), l.n, fp);
			}
			fread(l.filters, sizeof(float), num, fp);
			if (l.flipped) {
			transpose_matrix(l.filters, l.cl.sizel.size, l.n);
			}
			#ifdef GPU
			if(gpu_index >= 0){
			push_convolutional_layer(l);
			@@ -719,6 +766,9 @@
			if(l.type == CONNECTED){
			fread(l.biases, sizeof(float), l.outputs, fp);
			fread(l.weights, sizeof(float), l.outputs*l.inputs, fp);
			if(major > 1000 \|\| minor > 1000){
			transpose_matrix(l.weights, l.inputs, l.outputs);
			}
			#ifdef GPU
			if(gpu_index >= 0){
			push_connected_layer(l);