~speedprog/mtg/mtg_card_detector.git

parent: 47c7af1c | patch | commit | ignore whitespace

AlexeyAB

2018-03-27 d9ae3dd681ed1c98e807ff937dbbb9cfc4d19fe0

Added Yolo v3

27 files modified

10 files added

	Makefile	2 ●●●●● patch \| view \| raw \| blame \| history
	build/darknet/darknet.vcxproj	4 ●●●●● patch \| view \| raw \| blame \| history
	build/darknet/darknet_no_gpu.vcxproj	4 ●●●●● patch \| view \| raw \| blame \| history
	build/darknet/x64/cfg/yolov3.cfg	789 ●●●●● patch \| view \| raw \| blame \| history
	build/darknet/x64/darknet_demo_mjpeg_stream.cmd	2 ●●●●● patch \| view \| raw \| blame \| history
	build/darknet/x64/darknet_yolo_v3.cmd	5 ●●●●● patch \| view \| raw \| blame \| history
	build/darknet/x64/yolov3.cfg	789 ●●●●● patch \| view \| raw \| blame \| history
	build/darknet/yolo_cpp_dll.vcxproj	4 ●●●●● patch \| view \| raw \| blame \| history
	build/darknet/yolo_cpp_dll_no_gpu.vcxproj	4 ●●●●● patch \| view \| raw \| blame \| history
	cfg/yolov3.cfg	789 ●●●●● patch \| view \| raw \| blame \| history
	image_yolov3.sh	6 ●●●●● patch \| view \| raw \| blame \| history
	src/blas.c	16 ●●●●● patch \| view \| raw \| blame \| history
	src/blas.h	3 ●●●●● patch \| view \| raw \| blame \| history
	src/blas_kernels.cu	31 ●●●●● patch \| view \| raw \| blame \| history
	src/box.c	86 ●●●●● patch \| view \| raw \| blame \| history
	src/box.h	11 ●●●●● patch \| view \| raw \| blame \| history
	src/demo.c	18 ●●●●● patch \| view \| raw \| blame \| history
	src/detection_layer.c	28 ●●●●● patch \| view \| raw \| blame \| history
	src/detection_layer.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/detector.c	37 ●●●●● patch \| view \| raw \| blame \| history
	src/image.c	178 ●●●●● patch \| view \| raw \| blame \| history
	src/image.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/layer.h	8 ●●●●● patch \| view \| raw \| blame \| history
	src/network.c	102 ●●●●● patch \| view \| raw \| blame \| history
	src/network.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/parser.c	76 ●●●●● patch \| view \| raw \| blame \| history
	src/region_layer.c	119 ●●●●● patch \| view \| raw \| blame \| history
	src/region_layer.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/tree.c	32 ●●●●● patch \| view \| raw \| blame \| history
	src/tree.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/upsample_layer.c	106 ●●●●● patch \| view \| raw \| blame \| history
	src/upsample_layer.h	17 ●●●●● patch \| view \| raw \| blame \| history
	src/utils.c	9 ●●●●● patch \| view \| raw \| blame \| history
	src/utils.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/yolo_layer.c	381 ●●●●● patch \| view \| raw \| blame \| history
	src/yolo_layer.h	20 ●●●●● patch \| view \| raw \| blame \| history
	video_yolov3.sh	6 ●●●●● patch \| view \| raw \| blame \| history

 Makefile

@@ -85,7 +85,7 @@
endif
endif

OBJ=http_stream.o gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o
OBJ=http_stream.o gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o upsample_layer.o
ifeq ($(GPU), 1) 
LDFLAGS+= -lstdc++ 
OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o

 build/darknet/darknet.vcxproj

@@ -227,10 +227,12 @@
    <ClCompile Include="..\..\src\swag.c" />
    <ClCompile Include="..\..\src\tag.c" />
    <ClCompile Include="..\..\src\tree.c" />
    <ClCompile Include="..\..\src\upsample_layer.c" />
    <ClCompile Include="..\..\src\utils.c" />
    <ClCompile Include="..\..\src\voxel.c" />
    <ClCompile Include="..\..\src\writing.c" />
    <ClCompile Include="..\..\src\yolo.c" />
    <ClCompile Include="..\..\src\yolo_layer.c" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\..\src\activations.h" />
@@ -279,7 +281,9 @@
    <ClInclude Include="..\..\src\stb_image_write.h" />
    <ClInclude Include="..\..\src\tree.h" />
    <ClInclude Include="..\..\src\unistd.h" />
    <ClInclude Include="..\..\src\upsample_layer.h" />
    <ClInclude Include="..\..\src\utils.h" />
    <ClInclude Include="..\..\src\yolo_layer.h" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">

 build/darknet/darknet_no_gpu.vcxproj

@@ -224,10 +224,12 @@
    <ClCompile Include="..\..\src\swag.c" />
    <ClCompile Include="..\..\src\tag.c" />
    <ClCompile Include="..\..\src\tree.c" />
    <ClCompile Include="..\..\src\upsample_layer.c" />
    <ClCompile Include="..\..\src\utils.c" />
    <ClCompile Include="..\..\src\voxel.c" />
    <ClCompile Include="..\..\src\writing.c" />
    <ClCompile Include="..\..\src\yolo.c" />
    <ClCompile Include="..\..\src\yolo_layer.c" />
  </ItemGroup>
  <ItemGroup>
    <ClInclude Include="..\..\src\activations.h" />
@@ -276,7 +278,9 @@
    <ClInclude Include="..\..\src\stb_image_write.h" />
    <ClInclude Include="..\..\src\tree.h" />
    <ClInclude Include="..\..\src\unistd.h" />
    <ClInclude Include="..\..\src\upsample_layer.h" />
    <ClInclude Include="..\..\src\utils.h" />
    <ClInclude Include="..\..\src\yolo_layer.h" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets" />

 build/darknet/x64/cfg/yolov3.cfg

New file
@@ -0,0 +1,789 @@
[net]
# Testing
batch=1
subdivisions=1
# Training
# batch=64
# subdivisions=16
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1

learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1

[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky

# Downsample

[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

######################

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 6,7,8
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1


[route]
layers = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride=2

[route]
layers = -1, 61



[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 3,4,5
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1



[route]
layers = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride=2

[route]
layers = -1, 36



[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 0,1,2
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1


 build/darknet/x64/darknet_demo_mjpeg_stream.cmd

@@ -1,7 +1,7 @@
rem Run this file and then open URL in Chrome/Firefox: rem http://localhost:8090
rem Or open: http://ip-address:8090

darknet.exe detector demo data/voc.data yolo-voc.cfg yolo-voc.weights test.mp4 -i 0 -http_port 8090
darknet.exe detector demo data/voc.data yolo-voc.cfg yolo-voc.weights test.mp4 -i 0 -http_port 8090 -dont_show


pause

 build/darknet/x64/darknet_yolo_v3.cmd

New file
@@ -0,0 +1,5 @@

darknet.exe detector test data/coco.data yolov3.cfg yolov3.weights -i 0 -thresh 0.25 dogr.jpg


pause

 build/darknet/x64/yolov3.cfg

New file
@@ -0,0 +1,789 @@
[net]
# Testing
batch=1
subdivisions=1
# Training
# batch=64
# subdivisions=16
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1

learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1

[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky

# Downsample

[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

######################

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 6,7,8
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1


[route]
layers = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride=2

[route]
layers = -1, 61



[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 3,4,5
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1



[route]
layers = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride=2

[route]
layers = -1, 36



[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 0,1,2
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1


 build/darknet/yolo_cpp_dll.vcxproj

@@ -229,10 +229,12 @@
    <ClCompile Include="..\..\src\swag.c" />
    <ClCompile Include="..\..\src\tag.c" />
    <ClCompile Include="..\..\src\tree.c" />
    <ClCompile Include="..\..\src\upsample_layer.c" />
    <ClCompile Include="..\..\src\utils.c" />
    <ClCompile Include="..\..\src\voxel.c" />
    <ClCompile Include="..\..\src\writing.c" />
    <ClCompile Include="..\..\src\yolo.c" />
    <ClCompile Include="..\..\src\yolo_layer.c" />
    <ClCompile Include="..\..\src\yolo_v2_class.cpp" />
    <ClCompile Include="..\..\src\yolo_v2_class.hpp" />
  </ItemGroup>
@@ -283,7 +285,9 @@
    <ClInclude Include="..\..\src\stb_image_write.h" />
    <ClInclude Include="..\..\src\tree.h" />
    <ClInclude Include="..\..\src\unistd.h" />
    <ClInclude Include="..\..\src\upsample_layer.h" />
    <ClInclude Include="..\..\src\utils.h" />
    <ClInclude Include="..\..\src\yolo_layer.h" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets">

 build/darknet/yolo_cpp_dll_no_gpu.vcxproj

@@ -213,10 +213,12 @@
    <ClCompile Include="..\..\src\swag.c" />
    <ClCompile Include="..\..\src\tag.c" />
    <ClCompile Include="..\..\src\tree.c" />
    <ClCompile Include="..\..\src\upsample_layer.c" />
    <ClCompile Include="..\..\src\utils.c" />
    <ClCompile Include="..\..\src\voxel.c" />
    <ClCompile Include="..\..\src\writing.c" />
    <ClCompile Include="..\..\src\yolo.c" />
    <ClCompile Include="..\..\src\yolo_layer.c" />
    <ClCompile Include="..\..\src\yolo_v2_class.cpp" />
    <ClCompile Include="..\..\src\yolo_v2_class.hpp" />
  </ItemGroup>
@@ -266,7 +268,9 @@
    <ClInclude Include="..\..\src\stb_image_write.h" />
    <ClInclude Include="..\..\src\tree.h" />
    <ClInclude Include="..\..\src\unistd.h" />
    <ClInclude Include="..\..\src\upsample_layer.h" />
    <ClInclude Include="..\..\src\utils.h" />
    <ClInclude Include="..\..\src\yolo_layer.h" />
  </ItemGroup>
  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
  <ImportGroup Label="ExtensionTargets" />

 cfg/yolov3.cfg

New file
@@ -0,0 +1,789 @@
[net]
# Testing
batch=1
subdivisions=1
# Training
# batch=64
# subdivisions=16
width=416
height=416
channels=3
momentum=0.9
decay=0.0005
angle=0
saturation = 1.5
exposure = 1.5
hue=.1

learning_rate=0.001
burn_in=1000
max_batches = 500200
policy=steps
steps=400000,450000
scales=.1,.1

[convolutional]
batch_normalize=1
filters=32
size=3
stride=1
pad=1
activation=leaky

# Downsample

[convolutional]
batch_normalize=1
filters=64
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=32
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=128
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=64
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=256
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=512
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear


[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

# Downsample

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=2
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
filters=1024
size=3
stride=1
pad=1
activation=leaky

[shortcut]
from=-3
activation=linear

######################

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
batch_normalize=1
filters=512
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=1024
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 6,7,8
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1


[route]
layers = -4

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride=2

[route]
layers = -1, 61



[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
batch_normalize=1
filters=256
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=512
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 3,4,5
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1



[route]
layers = -4

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[upsample]
stride=2

[route]
layers = -1, 36



[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
batch_normalize=1
filters=128
size=1
stride=1
pad=1
activation=leaky

[convolutional]
batch_normalize=1
size=3
stride=1
pad=1
filters=256
activation=leaky

[convolutional]
size=1
stride=1
pad=1
filters=255
activation=linear


[yolo]
mask = 0,1,2
anchors = 10,13,  16,30,  33,23,  30,61,  62,45,  59,119,  116,90,  156,198,  373,326
classes=80
num=9
jitter=.3
ignore_thresh = .5
truth_thresh = 1
random=1


 image_yolov3.sh

New file
@@ -0,0 +1,6 @@


./darknet detector test ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights data/dog.jpg -i 0 -thresh 0.25




 src/blas.c

@@ -291,3 +291,19 @@
    }
}

void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out)
{
    int i, j, k, b;
    for (b = 0; b < batch; ++b) {
        for (k = 0; k < c; ++k) {
            for (j = 0; j < h*stride; ++j) {
                for (i = 0; i < w*stride; ++i) {
                    int in_index = b*w*h*c + k*w*h + (j / stride)*w + i / stride;
                    int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i;
                    if (forward) out[out_index] = scale*in[in_index];
                    else in[in_index] += scale*out[out_index];
                }
            }
        }
    }
}

 src/blas.h

@@ -36,6 +36,7 @@
void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c);

void softmax(float *input, int n, float temp, float *output, int stride);
void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out);

#ifdef GPU
#include "cuda.h"
@@ -84,5 +85,7 @@

void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out);

void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out);

#endif
#endif

 src/blas_kernels.cu

@@ -784,3 +784,34 @@
    check_error(cudaPeekAtLastError());
}


__global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out)
{
    size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
    if (i >= N) return;
    int out_index = i;
    int out_w = i % (w*stride);
    i = i / (w*stride);
    int out_h = i % (h*stride);
    i = i / (h*stride);
    int out_c = i%c;
    i = i / c;
    int b = i%batch;

    int in_w = out_w / stride;
    int in_h = out_h / stride;
    int in_c = out_c;

    int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w;


    if (forward) out[out_index] += scale * x[in_index];
    else atomicAdd(x + in_index, scale * out[out_index]);
}

extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out)
{
    size_t size = w*h*c*batch*stride*stride;
    upsample_kernel << <cuda_gridsize(size), BLOCK >> >(size, in, w, h, c, batch, stride, forward, scale, out);
    check_error(cudaPeekAtLastError());
}

 src/box.c

@@ -276,6 +276,92 @@
    free(s);
}

int nms_comparator_v3(const void *pa, const void *pb)
{
    detection a = *(detection *)pa;
    detection b = *(detection *)pb;
    float diff = 0;
    if (b.sort_class >= 0) {
        diff = a.prob[b.sort_class] - b.prob[b.sort_class];
    }
    else {
        diff = a.objectness - b.objectness;
    }
    if (diff < 0) return 1;
    else if (diff > 0) return -1;
    return 0;
}

void do_nms_obj_v3(detection *dets, int total, int classes, float thresh)
{
    int i, j, k;
    k = total - 1;
    for (i = 0; i <= k; ++i) {
        if (dets[i].objectness == 0) {
            detection swap = dets[i];
            dets[i] = dets[k];
            dets[k] = swap;
            --k;
            --i;
        }
    }
    total = k + 1;

    for (i = 0; i < total; ++i) {
        dets[i].sort_class = -1;
    }

    qsort(dets, total, sizeof(detection), nms_comparator_v3);
    for (i = 0; i < total; ++i) {
        if (dets[i].objectness == 0) continue;
        box a = dets[i].bbox;
        for (j = i + 1; j < total; ++j) {
            if (dets[j].objectness == 0) continue;
            box b = dets[j].bbox;
            if (box_iou(a, b) > thresh) {
                dets[j].objectness = 0;
                for (k = 0; k < classes; ++k) {
                    dets[j].prob[k] = 0;
                }
            }
        }
    }
}

void do_nms_sort_v3(detection *dets, int total, int classes, float thresh)
{
    int i, j, k;
    k = total - 1;
    for (i = 0; i <= k; ++i) {
        if (dets[i].objectness == 0) {
            detection swap = dets[i];
            dets[i] = dets[k];
            dets[k] = swap;
            --k;
            --i;
        }
    }
    total = k + 1;

    for (k = 0; k < classes; ++k) {
        for (i = 0; i < total; ++i) {
            dets[i].sort_class = k;
        }
        qsort(dets, total, sizeof(detection), nms_comparator_v3);
        for (i = 0; i < total; ++i) {
            //printf("  k = %d, \t i = %d \n", k, i);
            if (dets[i].prob[k] == 0) continue;
            box a = dets[i].bbox;
            for (j = i + 1; j < total; ++j) {
                box b = dets[j].bbox;
                if (box_iou(a, b) > thresh) {
                    dets[j].prob[k] = 0;
                }
            }
        }
    }
}

void do_nms(box *boxes, float **probs, int total, int classes, float thresh)
{
    int i, j, k;

 src/box.h

@@ -9,12 +9,23 @@
    float dx, dy, dw, dh;
} dbox;

typedef struct detection {
    box bbox;
    int classes;
    float *prob;
    float *mask;
    float objectness;
    int sort_class;
} detection;

box float_to_box(float *f);
float box_iou(box a, box b);
float box_rmse(box a, box b);
dbox diou(box a, box b);
void do_nms(box *boxes, float **probs, int total, int classes, float thresh);
void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh);
void do_nms_sort_v3(detection *dets, int total, int classes, float thresh);
void do_nms_obj_v3(detection *dets, int total, int classes, float thresh);
box decode_box(box b, box anchor);
box encode_box(box b, box anchor);


 src/demo.c

@@ -50,6 +50,7 @@
static float *avg;

void draw_detections_cv(IplImage* show_img, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes);
void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes);
void show_image_cv_ipl(IplImage *disp, const char *name);
image get_image_from_stream_resize(CvCapture *cap, int w, int h, IplImage** in_img, int use_webcam);
IplImage* in_img;
@@ -77,7 +78,7 @@

void *detect_in_thread(void *ptr)
{
    float nms = .4;
    float nms = .45;    // 0.4F

    layer l = net.layers[net.n-1];
    float *X = det_s.data;
@@ -88,6 +89,7 @@
    l.output = avg;

    free_image(det_s);
    /*
    if(l.type == DETECTION){
        get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
    } else if (l.type == REGION){
@@ -96,6 +98,12 @@
        error("Last layer must produce detections\n");
    }
    if (nms > 0) do_nms(boxes, probs, l.w*l.h*l.n, l.classes, nms);
    */
    int letter = 0;
    int nboxes = 0;
    detection *dets = get_network_boxes(&net, det.w, det.h, demo_thresh, demo_thresh, 0, 1, &nboxes, letter);
    if (nms) do_nms_obj_v3(dets, nboxes, l.classes, nms);

    printf("\033[2J");
    printf("\033[1;1H");
    printf("\nFPS:%.1f\n",fps);
@@ -108,7 +116,9 @@
    demo_index = (demo_index + 1)%FRAMES;
        
    //draw_detections(det, l.w*l.h*l.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes);
    draw_detections_cv(det_img, l.w*l.h*l.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes);
    draw_detections_cv_v3(det_img, dets, nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes);
    //draw_detections_cv(det_img, l.w*l.h*l.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes);
    free(dets);

    return 0;
}
@@ -122,7 +132,7 @@
    return (double)time.tv_sec + (double)time.tv_usec * .000001;
}

void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, 
void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes,
    int frame_skip, char *prefix, char *out_filename, int http_stream_port, int dont_show)
{
    //skip = frame_skip;
@@ -303,7 +313,7 @@
    }
}
#else
void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, char *out_filename, int http_stream_port, int dont_show)
void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, char *out_filename, int http_stream_port, int dont_show)
{
    fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
}

 src/detection_layer.c

@@ -285,3 +285,31 @@
}
#endif

void get_detection_detections(layer l, int w, int h, float thresh, detection *dets)
{
    int i, j, n;
    float *predictions = l.output;
    //int per_cell = 5*num+classes;
    for (i = 0; i < l.side*l.side; ++i) {
        int row = i / l.side;
        int col = i % l.side;
        for (n = 0; n < l.n; ++n) {
            int index = i*l.n + n;
            int p_index = l.side*l.side*l.classes + i*l.n + n;
            float scale = predictions[p_index];
            int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n) * 4;
            box b;
            b.x = (predictions[box_index + 0] + col) / l.side * w;
            b.y = (predictions[box_index + 1] + row) / l.side * h;
            b.w = pow(predictions[box_index + 2], (l.sqrt ? 2 : 1)) * w;
            b.h = pow(predictions[box_index + 3], (l.sqrt ? 2 : 1)) * h;
            dets[index].bbox = b;
            dets[index].objectness = scale;
            for (j = 0; j < l.classes; ++j) {
                int class_index = i*l.classes;
                float prob = scale*predictions[class_index + j];
                dets[index].prob[j] = (prob > thresh) ? prob : 0;
            }
        }
    }
}

 src/detection_layer.h

@@ -10,6 +10,7 @@
void forward_detection_layer(const detection_layer l, network_state state);
void backward_detection_layer(const detection_layer l, network_state state);
void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness);
void get_detection_detections(layer l, int w, int h, float thresh, detection *dets);

#ifdef GPU
void forward_detection_layer_gpu(const detection_layer l, network_state state);

 src/detector.c

@@ -1000,7 +1000,7 @@
}
#endif // OPENCV

void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, int dont_show)
void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, int dont_show)
{
    list *options = read_data_cfg(datacfg);
    char *name_list = option_find_str(options, "names", "data/names.list");
@@ -1017,7 +1017,7 @@
    char buff[256];
    char *input = buff;
    int j;
    float nms=.4;
    float nms=.45;  // 0.4F
    while(1){
        if(filename){
            strncpy(input, filename, 256);
@@ -1030,21 +1030,27 @@
            strtok(input, "\n");
        }
        image im = load_image_color(input,0,0);
        image sized = resize_image(im, net.w, net.h);
        //image sized = letterbox_image(im, net.w, net.h);
        int letter = 0;
        //image sized = resize_image(im, net.w, net.h);
        image sized = letterbox_image(im, net.w, net.h); letter = 1;
        layer l = net.layers[net.n-1];

        box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
        float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
        for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
        //box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
        //float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
        //for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));

        float *X = sized.data;
        time=clock();
        network_predict(net, X);
        printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
        get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0);
        if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
        draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
        //get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0);
        // if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms);
        //draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes);
        int nboxes = 0;
        detection *dets = get_network_boxes(&net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes, letter);
        if (nms) do_nms_sort_v3(dets, nboxes, l.classes, nms);
        draw_detections_v3(im, dets, nboxes, thresh, names, alphabet, l.classes);
        free_detections(dets, nboxes);
        save_image(im, "predictions");
        if (!dont_show) {
            show_image(im, "predictions");
@@ -1052,8 +1058,8 @@

        free_image(im);
        free_image(sized);
        free(boxes);
        free_ptrs((void **)probs, l.w*l.h*l.n);
        //free(boxes);
        //free_ptrs((void **)probs, l.w*l.h*l.n);
#ifdef OPENCV
        if (!dont_show) {
            cvWaitKey(0);
@@ -1071,7 +1077,8 @@
    int http_stream_port = find_int_arg(argc, argv, "-http_port", -1);
    char *out_filename = find_char_arg(argc, argv, "-out_filename", 0);
    char *prefix = find_char_arg(argc, argv, "-prefix", 0);
    float thresh = find_float_arg(argc, argv, "-thresh", .24);
    float thresh = find_float_arg(argc, argv, "-thresh", .25);  // 0.24
    float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
    int cam_index = find_int_arg(argc, argv, "-c", 0);
    int frame_skip = find_int_arg(argc, argv, "-s", 0);
    int num_of_clusters = find_int_arg(argc, argv, "-num_of_clusters", 5);
@@ -1112,7 +1119,7 @@
    if(weights)
        if (weights[strlen(weights) - 1] == 0x0d) weights[strlen(weights) - 1] = 0;
    char *filename = (argc > 6) ? argv[6]: 0;
    if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, dont_show);
    if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, dont_show);
    else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show);
    else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights);
    else if(0==strcmp(argv[2], "recall")) validate_detector_recall(datacfg, cfg, weights);
@@ -1125,7 +1132,7 @@
        char **names = get_labels(name_list);
        if(filename)
            if (filename[strlen(filename) - 1] == 0x0d) filename[strlen(filename) - 1] = 0;
        demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, out_filename, 
        demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, frame_skip, prefix, out_filename,
            http_stream_port, dont_show);
    }
}

 src/image.c

@@ -93,6 +93,23 @@
    return b;
}

image get_label_v3(image **characters, char *string, int size)
{
    size = size / 10;
    if (size > 7) size = 7;
    image label = make_empty_image(0, 0, 0);
    while (*string) {
        image l = characters[size][(int)*string];
        image n = tile_images(label, l, -size - 1 + (size + 1) / 2);
        free_image(label);
        label = n;
        ++string;
    }
    image b = border_image(label, label.h*.25);
    free_image(label);
    return b;
}

void draw_label(image a, int r, int c, image label, const float *rgb)
{
    int w = label.w;
@@ -183,6 +200,80 @@
    return alphabets;
}

void draw_detections_v3(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes)
{
    int i, j;

    for (i = 0; i < num; ++i) {
        char labelstr[4096] = { 0 };
        int class_id = -1;
        for (j = 0; j < classes; ++j) {
            if (dets[i].prob[j] > thresh) {
                if (class_id < 0) {
                    strcat(labelstr, names[j]);
                    class_id = j;
                }
                else {
                    strcat(labelstr, ", ");
                    strcat(labelstr, names[j]);
                }
                printf("%s: %.0f%%\n", names[j], dets[i].prob[j] * 100);
            }
        }
        if (class_id >= 0) {
            int width = im.h * .006;

            /*
            if(0){
            width = pow(prob, 1./2.)*10+1;
            alphabet = 0;
            }
            */

            //printf("%d %s: %.0f%%\n", i, names[class_id], prob*100);
            int offset = class_id * 123457 % classes;
            float red = get_color(2, offset, classes);
            float green = get_color(1, offset, classes);
            float blue = get_color(0, offset, classes);
            float rgb[3];

            //width = prob*20+2;

            rgb[0] = red;
            rgb[1] = green;
            rgb[2] = blue;
            box b = dets[i].bbox;
            //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);

            int left = (b.x - b.w / 2.)*im.w;
            int right = (b.x + b.w / 2.)*im.w;
            int top = (b.y - b.h / 2.)*im.h;
            int bot = (b.y + b.h / 2.)*im.h;

            if (left < 0) left = 0;
            if (right > im.w - 1) right = im.w - 1;
            if (top < 0) top = 0;
            if (bot > im.h - 1) bot = im.h - 1;

            draw_box_width(im, left, top, right, bot, width, red, green, blue);
            if (alphabet) {
                image label = get_label_v3(alphabet, labelstr, (im.h*.03));
                draw_label(im, top + width, left, label, rgb);
                free_image(label);
            }
            if (dets[i].mask) {
                image mask = float_to_image(14, 14, 1, dets[i].mask);
                image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h);
                image tmask = threshold_image(resized_mask, .5);
                embed_image(tmask, im, left, top);
                free_image(mask);
                free_image(resized_mask);
                free_image(tmask);
            }
        }
    }
}

void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes)
{
    int i;
@@ -245,6 +336,93 @@
}

#ifdef OPENCV

void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes)
{
    int i, j;
    if (!show_img) return;

    for (i = 0; i < num; ++i) {
        char labelstr[4096] = { 0 };
        int class_id = -1;
        for (j = 0; j < classes; ++j) {
            if (dets[i].prob[j] > thresh) {
                if (class_id < 0) {
                    strcat(labelstr, names[j]);
                    class_id = j;
                }
                else {
                    strcat(labelstr, ", ");
                    strcat(labelstr, names[j]);
                }
                printf("%s: %.0f%%\n", names[j], dets[i].prob[j] * 100);
            }
        }
        if (class_id >= 0) {
            int width = show_img->height * .006;

            /*
            if(0){
            width = pow(prob, 1./2.)*10+1;
            alphabet = 0;
            }
            */

            //printf("%d %s: %.0f%%\n", i, names[class_id], prob*100);
            int offset = class_id * 123457 % classes;
            float red = get_color(2, offset, classes);
            float green = get_color(1, offset, classes);
            float blue = get_color(0, offset, classes);
            float rgb[3];

            //width = prob*20+2;

            rgb[0] = red;
            rgb[1] = green;
            rgb[2] = blue;
            box b = dets[i].bbox;
            //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);

            int left = (b.x - b.w / 2.)*show_img->width;
            int right = (b.x + b.w / 2.)*show_img->width;
            int top = (b.y - b.h / 2.)*show_img->height;
            int bot = (b.y + b.h / 2.)*show_img->height;

            if (left < 0) left = 0;
            if (right > show_img->width - 1) right = show_img->width - 1;
            if (top < 0) top = 0;
            if (bot > show_img->height - 1) bot = show_img->height - 1;

            float const font_size = show_img->height / 1000.F;
            CvPoint pt1, pt2, pt_text, pt_text_bg1, pt_text_bg2;
            pt1.x = left;
            pt1.y = top;
            pt2.x = right;
            pt2.y = bot;
            pt_text.x = left;
            pt_text.y = top - 12;
            pt_text_bg1.x = left;
            pt_text_bg1.y = top - (10 + 25 * font_size);
            pt_text_bg2.x = right;
            pt_text_bg2.y = top;
            CvScalar color;
            color.val[0] = red * 256;
            color.val[1] = green * 256;
            color.val[2] = blue * 256;

            cvRectangle(show_img, pt1, pt2, color, width, 8, 0);
            //printf("left=%d, right=%d, top=%d, bottom=%d, obj_id=%d, obj=%s \n", left, right, top, bot, class_id, names[class_id]);
            cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, width, 8, 0);
            cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, CV_FILLED, 8, 0);    // filled
            CvScalar black_color;
            black_color.val[0] = 0;
            CvFont font;
            cvInitFont(&font, CV_FONT_HERSHEY_SIMPLEX, font_size, font_size, 0, font_size * 3, 8);
            cvPutText(show_img, names[class_id], pt_text, &font, black_color);
        }
    }
}

void draw_detections_cv(IplImage* show_img, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes)
{
    int i;

 src/image.h

@@ -23,6 +23,7 @@
void draw_label(image a, int r, int c, image label, const float *rgb);
void write_label(image a, int r, int c, image *characters, char *string, float *rgb);
void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **labels, int classes);
void draw_detections_v3(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes);
image image_distance(image a, image b);
void scale_image(image m, float s);
image crop_image(image im, int dx, int dy, int w, int h);

 src/layer.h

@@ -33,7 +33,9 @@
    NETWORK,
    XNOR,
    REGION,
    YOLO,
    REORG,
    UPSAMPLE,
    REORG_OLD,
    BLANK
} LAYER_TYPE;
@@ -109,6 +111,9 @@
    int noadjust;
    int reorg;
    int log;
    int tanh;
    int *mask;
    int total;

    int adam;
    float B1;
@@ -133,7 +138,10 @@
    float class_scale;
    int bias_match;
    int random;
    float ignore_thresh;
    float truth_thresh;
    float thresh;
    float focus;
    int classfix;
    int absolute;


 src/network.c

@@ -27,6 +27,7 @@
#include "dropout_layer.h"
#include "route_layer.h"
#include "shortcut_layer.h"
#include "yolo_layer.h"

int get_current_batch(network net)
{
@@ -499,6 +500,107 @@
    return out;
}

int num_detections(network *net, float thresh)
{
    int i;
    int s = 0;
    for (i = 0; i < net->n; ++i) {
        layer l = net->layers[i];
        if (l.type == YOLO) {
            s += yolo_num_detections(l, thresh);
        }
        if (l.type == DETECTION || l.type == REGION) {
            s += l.w*l.h*l.n;
        }
    }
    return s;
}

detection *make_network_boxes(network *net, float thresh, int *num)
{
    layer l = net->layers[net->n - 1];
    int i;
    int nboxes = num_detections(net, thresh);
    if (num) *num = nboxes;
    detection *dets = calloc(nboxes, sizeof(detection));
    for (i = 0; i < nboxes; ++i) {
        dets[i].prob = calloc(l.classes, sizeof(float));
        if (l.coords > 4) {
            dets[i].mask = calloc(l.coords - 4, sizeof(float));
        }
    }
    return dets;
}


void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, float thresh, int *map, float hier, int relative, detection *dets, int letter)
{
    box *boxes = calloc(l.w*l.h*l.n, sizeof(box));
    float **probs = calloc(l.w*l.h*l.n, sizeof(float *));
    int i, j;
    for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
    get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map);
    for (j = 0; j < l.w*l.h*l.n; ++j) {
        dets[j].classes = l.classes;
        dets[j].bbox = boxes[j];
        dets[j].objectness = 1;
        for (i = 0; i < l.classes; ++i) dets[j].prob[i] = probs[j][i];
    }

    free(boxes);
    free_ptrs((void **)probs, l.w*l.h*l.n);
}

void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets, int letter)
{
    int j;
    for (j = 0; j < net->n; ++j) {
        layer l = net->layers[j];
        if (l.type == YOLO) {
            int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets, letter);
            dets += count;
        }
        if (l.type == REGION) {
            custom_get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets, letter);
            //get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets);
            dets += l.w*l.h*l.n;
        }
        if (l.type == DETECTION) {
            get_detection_detections(l, w, h, thresh, dets);
            dets += l.w*l.h*l.n;
        }
    }
}

detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter)
{
    detection *dets = make_network_boxes(net, thresh, num);
    fill_network_boxes(net, w, h, thresh, hier, map, relative, dets, letter);
    return dets;
}

void free_detections(detection *dets, int n)
{
    int i;
    for (i = 0; i < n; ++i) {
        free(dets[i].prob);
        if (dets[i].mask) free(dets[i].mask);
    }
    free(dets);
}

float *network_predict_image(network *net, image im)
{
    image imr = letterbox_image(im, net->w, net->h);
    set_batch_network(net, 1);
    float *p = network_predict(*net, imr.data);
    free_image(imr);
    return p;
}

int network_width(network *net) { return net->w; }
int network_height(network *net) { return net->h; }

matrix network_predict_data_multi(network net, data test, int n)
{
    int i,j,b,m;

 src/network.h

@@ -132,6 +132,7 @@
void set_batch_network(network *net, int b);
int get_network_input_size(network net);
float get_network_cost(network net);
detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter);

int get_network_nuisance(network net);
int get_network_background(network net);

 src/parser.c

@@ -30,6 +30,8 @@
#include "shortcut_layer.h"
#include "softmax_layer.h"
#include "utils.h"
#include "upsample_layer.h"
#include "yolo_layer.h"
#include <stdint.h>

typedef struct{
@@ -47,6 +49,7 @@
    if (strcmp(type, "[cost]")==0) return COST;
    if (strcmp(type, "[detection]")==0) return DETECTION;
    if (strcmp(type, "[region]")==0) return REGION;
    if (strcmp(type, "[yolo]") == 0) return YOLO;
    if (strcmp(type, "[local]")==0) return LOCAL;
    if (strcmp(type, "[conv]")==0
            || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
@@ -71,6 +74,7 @@
    if (strcmp(type, "[soft]")==0
            || strcmp(type, "[softmax]")==0) return SOFTMAX;
    if (strcmp(type, "[route]")==0) return ROUTE;
    if (strcmp(type, "[upsample]") == 0) return UPSAMPLE;
    return BLANK;
}

@@ -235,6 +239,65 @@
    return layer;
}

int *parse_yolo_mask(char *a, int *num)
{
    int *mask = 0;
    if (a) {
        int len = strlen(a);
        int n = 1;
        int i;
        for (i = 0; i < len; ++i) {
            if (a[i] == ',') ++n;
        }
        mask = calloc(n, sizeof(int));
        for (i = 0; i < n; ++i) {
            int val = atoi(a);
            mask[i] = val;
            a = strchr(a, ',') + 1;
        }
        *num = n;
    }
    return mask;
}

layer parse_yolo(list *options, size_params params)
{
    int classes = option_find_int(options, "classes", 20);
    int total = option_find_int(options, "num", 1);
    int num = total;

    char *a = option_find_str(options, "mask", 0);
    int *mask = parse_yolo_mask(a, &num);
    layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes);
    assert(l.outputs == params.inputs);

    l.max_boxes = option_find_int_quiet(options, "max", 90);
    l.jitter = option_find_float(options, "jitter", .2);

    l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
    l.truth_thresh = option_find_float(options, "truth_thresh", 1);
    l.random = option_find_int_quiet(options, "random", 0);

    char *map_file = option_find_str(options, "map", 0);
    if (map_file) l.map = read_map(map_file);

    a = option_find_str(options, "anchors", 0);
    if (a) {
        int len = strlen(a);
        int n = 1;
        int i;
        for (i = 0; i < len; ++i) {
            if (a[i] == ',') ++n;
        }
        for (i = 0; i < n; ++i) {
            float bias = atof(a);
            l.biases[i] = bias;
            a = strchr(a, ',') + 1;
        }
    }
    return l;
}

layer parse_region(list *options, size_params params)
{
    int coords = option_find_int(options, "coords", 4);
@@ -469,6 +532,15 @@
    return l;
}

layer parse_upsample(list *options, size_params params, network net)
{

    int stride = option_find_int(options, "stride", 2);
    layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride);
    l.scale = option_find_float_quiet(options, "scale", 1);
    return l;
}

route_layer parse_route(list *options, size_params params, network net)
{
    char *l = option_find(options, "layers");   
@@ -665,6 +737,8 @@
            l = parse_cost(options, params);
        }else if(lt == REGION){
            l = parse_region(options, params);
        }else if (lt == YOLO) {
            l = parse_yolo(options, params);
        }else if(lt == DETECTION){
            l = parse_detection(options, params);
        }else if(lt == SOFTMAX){
@@ -684,6 +758,8 @@
            l = parse_avgpool(options, params);
        }else if(lt == ROUTE){
            l = parse_route(options, params, net);
        }else if (lt == UPSAMPLE) {
            l = parse_upsample(options, params, net);
        }else if(lt == SHORTCUT){
            l = parse_shortcut(options, params, net);
        }else if(lt == DROPOUT){

 src/region_layer.c

@@ -130,12 +130,14 @@
    } else {        
        // Focal loss
        if (focal_loss) {
            // Focal Loss for Dense Object Detection: http://blog.csdn.net/linmingan/article/details/77885832
            // Focal Loss
            float alpha = 0.5;  // 0.25 or 0.5
            //float gamma = 2;  // hardcoded in many places of the grad-formula 

            int ti = index + class_id;
            float grad = -2 * (1 - output[ti])*logf(fmaxf(output[ti], 0.0000001))*output[ti] + (1 - output[ti])*(1 - output[ti]);
            float pt = output[ti] + 0.000000000000001F;
            //float grad = -(1 - pt) * (2 * pt*logf(pt) + pt - 1);  // http://blog.csdn.net/linmingan/article/details/77885832	
            float grad = (1 - pt) * (2 * pt*logf(pt) + pt - 1);     // https://github.com/unsky/focal-loss

            for (n = 0; n < classes; ++n) {
                delta[index + n] = scale * (((n == class_id) ? 1 : 0) - output[index + n]);
@@ -165,6 +167,13 @@
    return (x != x);
}

static int entry_index(layer l, int batch, int location, int entry)
{
    int n = location / (l.w*l.h);
    int loc = location % (l.w*l.h);
    return batch*l.outputs + n*l.w*l.h*(l.coords + l.classes + 1) + entry*l.w*l.h + loc;
}

void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output);
void forward_region_layer(const region_layer l, network_state state)
{
@@ -454,3 +463,109 @@
}
#endif


void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative)
{
    int i;
    int new_w = 0;
    int new_h = 0;
    if (((float)netw / w) < ((float)neth / h)) {
        new_w = netw;
        new_h = (h * netw) / w;
    }
    else {
        new_h = neth;
        new_w = (w * neth) / h;
    }
    for (i = 0; i < n; ++i) {
        box b = dets[i].bbox;
        b.x = (b.x - (netw - new_w) / 2. / netw) / ((float)new_w / netw);
        b.y = (b.y - (neth - new_h) / 2. / neth) / ((float)new_h / neth);
        b.w *= (float)netw / new_w;
        b.h *= (float)neth / new_h;
        if (!relative) {
            b.x *= w;
            b.w *= w;
            b.y *= h;
            b.h *= h;
        }
        dets[i].bbox = b;
    }
}

void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets)
{
    int i, j, n, z;
    float *predictions = l.output;
    if (l.batch == 2) {
        float *flip = l.output + l.outputs;
        for (j = 0; j < l.h; ++j) {
            for (i = 0; i < l.w / 2; ++i) {
                for (n = 0; n < l.n; ++n) {
                    for (z = 0; z < l.classes + l.coords + 1; ++z) {
                        int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i;
                        int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1);
                        float swap = flip[i1];
                        flip[i1] = flip[i2];
                        flip[i2] = swap;
                        if (z == 0) {
                            flip[i1] = -flip[i1];
                            flip[i2] = -flip[i2];
                        }
                    }
                }
            }
        }
        for (i = 0; i < l.outputs; ++i) {
            l.output[i] = (l.output[i] + flip[i]) / 2.;
        }
    }
    for (i = 0; i < l.w*l.h; ++i) {
        int row = i / l.w;
        int col = i % l.w;
        for (n = 0; n < l.n; ++n) {
            int index = n*l.w*l.h + i;
            for (j = 0; j < l.classes; ++j) {
                dets[index].prob[j] = 0;
            }
            int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords);
            int box_index = entry_index(l, 0, n*l.w*l.h + i, 0);
            int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4);
            float scale = l.background ? 1 : predictions[obj_index];
            dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h);
            dets[index].objectness = scale > thresh ? scale : 0;
            if (dets[index].mask) {
                for (j = 0; j < l.coords - 4; ++j) {
                    dets[index].mask[j] = l.output[mask_index + j*l.w*l.h];
                }
            }

            int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background);
            if (l.softmax_tree) {

                hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h);
                if (map) {
                    for (j = 0; j < 200; ++j) {
                        int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + map[j]);
                        float prob = scale*predictions[class_index];
                        dets[index].prob[j] = (prob > thresh) ? prob : 0;
                    }
                }
                else {
                    int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h);
                    dets[index].prob[j] = (scale > thresh) ? scale : 0;
                }
            }
            else {
                if (dets[index].objectness) {
                    for (j = 0; j < l.classes; ++j) {
                        int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j);
                        float prob = scale*predictions[class_index];
                        dets[index].prob[j] = (prob > thresh) ? prob : 0;
                    }
                }
            }
        }
    }
    correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative);
}

 src/region_layer.h

@@ -11,6 +11,7 @@
void backward_region_layer(const region_layer l, network_state state);
void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map);
void resize_region_layer(layer *l, int w, int h);
void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets);

#ifdef GPU
void forward_region_layer_gpu(const region_layer l, network_state state);

 src/tree.c

@@ -50,6 +50,38 @@
    }
}

int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride)
{
    float p = 1;
    int group = 0;
    int i;
    while (1) {
        float max = 0;
        int max_i = 0;

        for (i = 0; i < hier->group_size[group]; ++i) {
            int index = i + hier->group_offset[group];
            float val = predictions[(i + hier->group_offset[group])*stride];
            if (val > max) {
                max_i = index;
                max = val;
            }
        }
        if (p*max > thresh) {
            p = p*max;
            group = hier->child[max_i];
            if (hier->child[max_i] < 0) return max_i;
        }
        else if (group == 0) {
            return max_i;
        }
        else {
            return hier->parent[hier->group_offset[group]];
        }
    }
    return 0;
}

tree *read_tree(char *filename)
{
    tree t = {0};

 src/tree.h

@@ -5,6 +5,7 @@
    int *leaf;
    int n;
    int *parent;
    int *child;
    int *group;
    char **name;

@@ -14,6 +15,7 @@
} tree;

tree *read_tree(char *filename);
int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride);
void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves);
void change_leaves(tree *t, char *leaf_list);
float get_hierarchy_probability(float *x, tree *hier, int c);

 src/upsample_layer.c

New file
@@ -0,0 +1,106 @@
#include "upsample_layer.h"
#include "cuda.h"
#include "blas.h"

#include <stdio.h>

layer make_upsample_layer(int batch, int w, int h, int c, int stride)
{
    layer l = {0};
    l.type = UPSAMPLE;
    l.batch = batch;
    l.w = w;
    l.h = h;
    l.c = c;
    l.out_w = w*stride;
    l.out_h = h*stride;
    l.out_c = c;
    if(stride < 0){
        stride = -stride;
        l.reverse=1;
        l.out_w = w/stride;
        l.out_h = h/stride;
    }
    l.stride = stride;
    l.outputs = l.out_w*l.out_h*l.out_c;
    l.inputs = l.w*l.h*l.c;
    l.delta =  calloc(l.outputs*batch, sizeof(float));
    l.output = calloc(l.outputs*batch, sizeof(float));;

    l.forward = forward_upsample_layer;
    l.backward = backward_upsample_layer;
    #ifdef GPU
    l.forward_gpu = forward_upsample_layer_gpu;
    l.backward_gpu = backward_upsample_layer_gpu;

    l.delta_gpu =  cuda_make_array(l.delta, l.outputs*batch);
    l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
    #endif
    if(l.reverse) fprintf(stderr, "downsample         %2dx  %4d x%4d x%4d   ->  %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c);
    else fprintf(stderr, "upsample           %2dx  %4d x%4d x%4d   ->  %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c);
    return l;
}

void resize_upsample_layer(layer *l, int w, int h)
{
    l->w = w;
    l->h = h;
    l->out_w = w*l->stride;
    l->out_h = h*l->stride;
    if(l->reverse){
        l->out_w = w/l->stride;
        l->out_h = h/l->stride;
    }
    l->outputs = l->out_w*l->out_h*l->out_c;
    l->inputs = l->h*l->w*l->c;
    l->delta =  realloc(l->delta, l->outputs*l->batch*sizeof(float));
    l->output = realloc(l->output, l->outputs*l->batch*sizeof(float));

#ifdef GPU
    cuda_free(l->output_gpu);
    cuda_free(l->delta_gpu);
    l->output_gpu  = cuda_make_array(l->output, l->outputs*l->batch);
    l->delta_gpu   = cuda_make_array(l->delta,  l->outputs*l->batch);
#endif
    
}

void forward_upsample_layer(const layer l, network_state net)
{
    fill_cpu(l.outputs*l.batch, 0, l.output, 1);
    if(l.reverse){
        upsample_cpu(l.output, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input);
    }else{
        upsample_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output);
    }
}

void backward_upsample_layer(const layer l, network_state state)
{
    if(l.reverse){
        upsample_cpu(l.delta, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, state.delta);
    }else{
        upsample_cpu(state.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta);
    }
}

#ifdef GPU
void forward_upsample_layer_gpu(const layer l, network_state state)
{
    fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1);
    if(l.reverse){
        upsample_gpu(l.output_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, state.input);
    }else{
        upsample_gpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output_gpu);
    }
}

void backward_upsample_layer_gpu(const layer l, network_state state)
{
    if(l.reverse){
        upsample_gpu(l.delta_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, state.delta);
    }else{
        upsample_gpu(state.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta_gpu);
    }
}
#endif

 src/upsample_layer.h

New file
@@ -0,0 +1,17 @@
#ifndef UPSAMPLE_LAYER_H
#define UPSAMPLE_LAYER_H
#include "cuda.h"
#include "layer.h"
#include "network.h"

layer make_upsample_layer(int batch, int w, int h, int c, int stride);
void forward_upsample_layer(const layer l, network net);
void backward_upsample_layer(const layer l, network net);
void resize_upsample_layer(layer *l, int w, int h);

#ifdef GPU
void forward_upsample_layer_gpu(const layer l, network net);
void backward_upsample_layer_gpu(const layer l, network net);
#endif

#endif

 src/utils.c

@@ -545,6 +545,15 @@
    return max_i;
}

int int_index(int *a, int val, int n)
{
    int i;
    for (i = 0; i < n; ++i) {
        if (a[i] == val) return i;
    }
    return -1;
}

int rand_int(int min, int max)
{
    if (max < min){

 src/utils.h

@@ -66,6 +66,7 @@
unsigned int random_gen();
float random_float();
float rand_uniform_strong(float min, float max);
int int_index(int *a, int val, int n);

#endif


 src/yolo_layer.c

New file
@@ -0,0 +1,381 @@
#include "yolo_layer.h"
#include "activations.h"
#include "blas.h"
#include "box.h"
#include "cuda.h"
#include "utils.h"

#include <stdio.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>

layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes)
{
    int i;
    layer l = {0};
    l.type = YOLO;

    l.n = n;
    l.total = total;
    l.batch = batch;
    l.h = h;
    l.w = w;
    l.c = n*(classes + 4 + 1);
    l.out_w = l.w;
    l.out_h = l.h;
    l.out_c = l.c;
    l.classes = classes;
    l.cost = calloc(1, sizeof(float));
    l.biases = calloc(total*2, sizeof(float));
    if(mask) l.mask = mask;
    else{
        l.mask = calloc(n, sizeof(int));
        for(i = 0; i < n; ++i){
            l.mask[i] = i;
        }
    }
    l.bias_updates = calloc(n*2, sizeof(float));
    l.outputs = h*w*n*(classes + 4 + 1);
    l.inputs = l.outputs;
    l.truths = 90*(4 + 1);
    l.delta = calloc(batch*l.outputs, sizeof(float));
    l.output = calloc(batch*l.outputs, sizeof(float));
    for(i = 0; i < total*2; ++i){
        l.biases[i] = .5;
    }

    l.forward = forward_yolo_layer;
    l.backward = backward_yolo_layer;
#ifdef GPU
    l.forward_gpu = forward_yolo_layer_gpu;
    l.backward_gpu = backward_yolo_layer_gpu;
    l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
    l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
#endif

    fprintf(stderr, "detection\n");
    srand(0);

    return l;
}

void resize_yolo_layer(layer *l, int w, int h)
{
    l->w = w;
    l->h = h;

    l->outputs = h*w*l->n*(l->classes + 4 + 1);
    l->inputs = l->outputs;

    l->output = realloc(l->output, l->batch*l->outputs*sizeof(float));
    l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float));

#ifdef GPU
    cuda_free(l->delta_gpu);
    cuda_free(l->output_gpu);

    l->delta_gpu =     cuda_make_array(l->delta, l->batch*l->outputs);
    l->output_gpu =    cuda_make_array(l->output, l->batch*l->outputs);
#endif
}

box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride)
{
    box b;
    b.x = (i + x[index + 0*stride]) / lw;
    b.y = (j + x[index + 1*stride]) / lh;
    b.w = exp(x[index + 2*stride]) * biases[2*n]   / w;
    b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h;
    return b;
}

float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride)
{
    box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride);
    float iou = box_iou(pred, truth);

    float tx = (truth.x*lw - i);
    float ty = (truth.y*lh - j);
    float tw = log(truth.w*w / biases[2*n]);
    float th = log(truth.h*h / biases[2*n + 1]);

    delta[index + 0*stride] = scale * (tx - x[index + 0*stride]);
    delta[index + 1*stride] = scale * (ty - x[index + 1*stride]);
    delta[index + 2*stride] = scale * (tw - x[index + 2*stride]);
    delta[index + 3*stride] = scale * (th - x[index + 3*stride]);
    return iou;
}


void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat)
{
    int n;
    if (delta[index]){
        delta[index + stride*class] = 1 - output[index + stride*class];
        if(avg_cat) *avg_cat += output[index + stride*class];
        return;
    }
    for(n = 0; n < classes; ++n){
        delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n];
        if(n == class && avg_cat) *avg_cat += output[index + stride*n];
    }
}

static int entry_index(layer l, int batch, int location, int entry)
{
    int n =   location / (l.w*l.h);
    int loc = location % (l.w*l.h);
    return batch*l.outputs + n*l.w*l.h*(4+l.classes+1) + entry*l.w*l.h + loc;
}

void forward_yolo_layer(const layer l, network_state state)
{
    int i,j,b,t,n;
    memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));

#ifndef GPU
    for (b = 0; b < l.batch; ++b){
        for(n = 0; n < l.n; ++n){
            int index = entry_index(l, b, n*l.w*l.h, 0);
            activate_array(l.output + index, 2*l.w*l.h, LOGISTIC);
            index = entry_index(l, b, n*l.w*l.h, 4);
            activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC);
        }
    }
#endif

    memset(l.delta, 0, l.outputs * l.batch * sizeof(float));
    if(!state.train) return;
    float avg_iou = 0;
    float recall = 0;
    float recall75 = 0;
    float avg_cat = 0;
    float avg_obj = 0;
    float avg_anyobj = 0;
    int count = 0;
    int class_count = 0;
    *(l.cost) = 0;
    for (b = 0; b < l.batch; ++b) {
        for (j = 0; j < l.h; ++j) {
            for (i = 0; i < l.w; ++i) {
                for (n = 0; n < l.n; ++n) {
                    int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0);
                    box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w*l.h);
                    float best_iou = 0;
                    int best_t = 0;
                    for(t = 0; t < l.max_boxes; ++t){
                        box truth = float_to_box(state.truth + t*(4 + 1) + b*l.truths, 1);
                        if(!truth.x) break;
                        float iou = box_iou(pred, truth);
                        if (iou > best_iou) {
                            best_iou = iou;
                            best_t = t;
                        }
                    }
                    int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4);
                    avg_anyobj += l.output[obj_index];
                    l.delta[obj_index] = 0 - l.output[obj_index];
                    if (best_iou > l.ignore_thresh) {
                        l.delta[obj_index] = 0;
                    }
                    if (best_iou > l.truth_thresh) {
                        l.delta[obj_index] = 1 - l.output[obj_index];

                        int class = state.truth[best_t*(4 + 1) + b*l.truths + 4];
                        if (l.map) class = l.map[class];
                        int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1);
                        delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0);
                        box truth = float_to_box(state.truth + best_t*(4 + 1) + b*l.truths, 1);
                        delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h);
                    }
                }
            }
        }
        for(t = 0; t < l.max_boxes; ++t){
            box truth = float_to_box(state.truth + t*(4 + 1) + b*l.truths, 1);

            if(!truth.x) break;
            float best_iou = 0;
            int best_n = 0;
            i = (truth.x * l.w);
            j = (truth.y * l.h);
            box truth_shift = truth;
            truth_shift.x = truth_shift.y = 0;
            for(n = 0; n < l.total; ++n){
                box pred = {0};
                pred.w = l.biases[2*n]/ state.net.w;
                pred.h = l.biases[2*n+1]/ state.net.h;
                float iou = box_iou(pred, truth_shift);
                if (iou > best_iou){
                    best_iou = iou;
                    best_n = n;
                }
            }

            int mask_n = int_index(l.mask, best_n, l.n);
            if(mask_n >= 0){
                int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0);
                float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h);

                int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4);
                avg_obj += l.output[obj_index];
                l.delta[obj_index] = 1 - l.output[obj_index];

                int class = state.truth[t*(4 + 1) + b*l.truths + 4];
                if (l.map) class = l.map[class];
                int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1);
                delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat);

                ++count;
                ++class_count;
                if(iou > .5) recall += 1;
                if(iou > .75) recall75 += 1;
                avg_iou += iou;
            }
        }
    }
    *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
    printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f,  count: %d\n", state.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count);
}

void backward_yolo_layer(const layer l, network_state state)
{
   axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
}

void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative, int letter)
{
    int i;
    int new_w=0;
    int new_h=0;
    if (letter) {
        if (((float)netw / w) < ((float)neth / h)) {
            new_w = netw;
            new_h = (h * netw) / w;
        }
        else {
            new_h = neth;
            new_w = (w * neth) / h;
        }
    }
    else {
        new_w = netw;
        new_h = neth;
    }
    for (i = 0; i < n; ++i){
        box b = dets[i].bbox;
        b.x =  (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); 
        b.y =  (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); 
        b.w *= (float)netw/new_w;
        b.h *= (float)neth/new_h;
        if(!relative){
            b.x *= w;
            b.w *= w;
            b.y *= h;
            b.h *= h;
        }
        dets[i].bbox = b;
    }
}

int yolo_num_detections(layer l, float thresh)
{
    int i, n;
    int count = 0;
    for (i = 0; i < l.w*l.h; ++i){
        for(n = 0; n < l.n; ++n){
            int obj_index  = entry_index(l, 0, n*l.w*l.h + i, 4);
            if(l.output[obj_index] > thresh){
                ++count;
            }
        }
    }
    return count;
}

void avg_flipped_yolo(layer l)
{
    int i,j,n,z;
    float *flip = l.output + l.outputs;
    for (j = 0; j < l.h; ++j) {
        for (i = 0; i < l.w/2; ++i) {
            for (n = 0; n < l.n; ++n) {
                for(z = 0; z < l.classes + 4 + 1; ++z){
                    int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i;
                    int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1);
                    float swap = flip[i1];
                    flip[i1] = flip[i2];
                    flip[i2] = swap;
                    if(z == 0){
                        flip[i1] = -flip[i1];
                        flip[i2] = -flip[i2];
                    }
                }
            }
        }
    }
    for(i = 0; i < l.outputs; ++i){
        l.output[i] = (l.output[i] + flip[i])/2.;
    }
}

int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets, int letter)
{
    int i,j,n;
    float *predictions = l.output;
    if (l.batch == 2) avg_flipped_yolo(l);
    int count = 0;
    for (i = 0; i < l.w*l.h; ++i){
        int row = i / l.w;
        int col = i % l.w;
        for(n = 0; n < l.n; ++n){
            int obj_index  = entry_index(l, 0, n*l.w*l.h + i, 4);
            float objectness = predictions[obj_index];
            if(objectness <= thresh) continue;
            int box_index  = entry_index(l, 0, n*l.w*l.h + i, 0);
            dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h);
            dets[count].objectness = objectness;
            dets[count].classes = l.classes;
            for(j = 0; j < l.classes; ++j){
                int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j);
                float prob = objectness*predictions[class_index];
                dets[count].prob[j] = (prob > thresh) ? prob : 0;
            }
            ++count;
        }
    }
    correct_yolo_boxes(dets, count, w, h, netw, neth, relative, letter);
    return count;
}

#ifdef GPU

void forward_yolo_layer_gpu(const layer l, network_state state)
{
    copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1);
    int b, n;
    for (b = 0; b < l.batch; ++b){
        for(n = 0; n < l.n; ++n){
            int index = entry_index(l, b, n*l.w*l.h, 0);
            activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC);
            index = entry_index(l, b, n*l.w*l.h, 4);
            activate_array_ongpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC);
        }
    }
    if(!state.train || l.onlyforward){
        cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs);
        return;
    }

    cuda_pull_array(l.output_gpu, state.input, l.batch*l.inputs);
    forward_yolo_layer(l, state);
    cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs);
}

void backward_yolo_layer_gpu(const layer l, network_state state)
{
    axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1);
}
#endif


 src/yolo_layer.h

New file
@@ -0,0 +1,20 @@
#ifndef YOLO_LAYER_H
#define YOLO_LAYER_H

//#include "darknet.h"
#include "layer.h"
#include "network.h"

layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes);
void forward_yolo_layer(const layer l, network net);
void backward_yolo_layer(const layer l, network net);
void resize_yolo_layer(layer *l, int w, int h);
int yolo_num_detections(layer l, float thresh);
int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets, int letter);

#ifdef GPU
void forward_yolo_layer_gpu(const layer l, network net);
void backward_yolo_layer_gpu(layer l, network net);
#endif

#endif

 video_yolov3.sh

New file
@@ -0,0 +1,6 @@


./darknet detector demo ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights test50.mp4 -i 0 -thresh 0.25

			@@ -85,7 +85,7 @@
			endif
			endif

			OBJ=http_stream.o gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o
			OBJ=http_stream.o gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o upsample_layer.o
			ifeq ($(GPU), 1)
			LDFLAGS+= -lstdc++
			OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o

			@@ -227,10 +227,12 @@
			<ClCompile Include="..\..\src\swag.c" />
			<ClCompile Include="..\..\src\tag.c" />
			<ClCompile Include="..\..\src\tree.c" />
			<ClCompile Include="..\..\src\upsample_layer.c" />
			<ClCompile Include="..\..\src\utils.c" />
			<ClCompile Include="..\..\src\voxel.c" />
			<ClCompile Include="..\..\src\writing.c" />
			<ClCompile Include="..\..\src\yolo.c" />
			<ClCompile Include="..\..\src\yolo_layer.c" />
			</ItemGroup>
			<ItemGroup>
			<ClInclude Include="..\..\src\activations.h" />
			@@ -279,7 +281,9 @@
			<ClInclude Include="..\..\src\stb_image_write.h" />
			<ClInclude Include="..\..\src\tree.h" />
			<ClInclude Include="..\..\src\unistd.h" />
			<ClInclude Include="..\..\src\upsample_layer.h" />
			<ClInclude Include="..\..\src\utils.h" />
			<ClInclude Include="..\..\src\yolo_layer.h" />
			</ItemGroup>
			<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
			<ImportGroup Label="ExtensionTargets">

			@@ -224,10 +224,12 @@
			<ClCompile Include="..\..\src\swag.c" />
			<ClCompile Include="..\..\src\tag.c" />
			<ClCompile Include="..\..\src\tree.c" />
			<ClCompile Include="..\..\src\upsample_layer.c" />
			<ClCompile Include="..\..\src\utils.c" />
			<ClCompile Include="..\..\src\voxel.c" />
			<ClCompile Include="..\..\src\writing.c" />
			<ClCompile Include="..\..\src\yolo.c" />
			<ClCompile Include="..\..\src\yolo_layer.c" />
			</ItemGroup>
			<ItemGroup>
			<ClInclude Include="..\..\src\activations.h" />
			@@ -276,7 +278,9 @@
			<ClInclude Include="..\..\src\stb_image_write.h" />
			<ClInclude Include="..\..\src\tree.h" />
			<ClInclude Include="..\..\src\unistd.h" />
			<ClInclude Include="..\..\src\upsample_layer.h" />
			<ClInclude Include="..\..\src\utils.h" />
			<ClInclude Include="..\..\src\yolo_layer.h" />
			</ItemGroup>
			<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
			<ImportGroup Label="ExtensionTargets" />

New file
			@@ -0,0 +1,789 @@
			[net]
			# Testing
			batch=1
			subdivisions=1
			# Training
			# batch=64
			# subdivisions=16
			width=416
			height=416
			channels=3
			momentum=0.9
			decay=0.0005
			angle=0
			saturation = 1.5
			exposure = 1.5
			hue=.1

			learning_rate=0.001
			burn_in=1000
			max_batches = 500200
			policy=steps
			steps=400000,450000
			scales=.1,.1

			[convolutional]
			batch_normalize=1
			filters=32
			size=3
			stride=1
			pad=1
			activation=leaky

			# Downsample

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=32
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=64
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			# Downsample

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=64
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			[convolutional]
			batch_normalize=1
			filters=64
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			# Downsample

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear


			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			# Downsample

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear


			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear


			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear


			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear


			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear


			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			# Downsample

			[convolutional]
			batch_normalize=1
			filters=1024
			size=3
			stride=2
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=1024
			size=3
			stride=1
			pad=1
			activation=leaky

			[shortcut]
			from=-3
			activation=linear

			######################

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			size=3
			stride=1
			pad=1
			filters=1024
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			size=3
			stride=1
			pad=1
			filters=1024
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=512
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			size=3
			stride=1
			pad=1
			filters=1024
			activation=leaky

			[convolutional]
			size=1
			stride=1
			pad=1
			filters=255
			activation=linear


			[yolo]
			mask = 6,7,8
			anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
			classes=80
			num=9
			jitter=.3
			ignore_thresh = .5
			truth_thresh = 1
			random=1


			[route]
			layers = -4

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[upsample]
			stride=2

			[route]
			layers = -1, 61



			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			size=3
			stride=1
			pad=1
			filters=512
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			size=3
			stride=1
			pad=1
			filters=512
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=256
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			size=3
			stride=1
			pad=1
			filters=512
			activation=leaky

			[convolutional]
			size=1
			stride=1
			pad=1
			filters=255
			activation=linear


			[yolo]
			mask = 3,4,5
			anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
			classes=80
			num=9
			jitter=.3
			ignore_thresh = .5
			truth_thresh = 1
			random=1



			[route]
			layers = -4

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[upsample]
			stride=2

			[route]
			layers = -1, 36



			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			size=3
			stride=1
			pad=1
			filters=256
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			size=3
			stride=1
			pad=1
			filters=256
			activation=leaky

			[convolutional]
			batch_normalize=1
			filters=128
			size=1
			stride=1
			pad=1
			activation=leaky

			[convolutional]
			batch_normalize=1
			size=3
			stride=1
			pad=1
			filters=256
			activation=leaky

			[convolutional]
			size=1
			stride=1
			pad=1
			filters=255
			activation=linear


			[yolo]
			mask = 0,1,2
			anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326
			classes=80
			num=9
			jitter=.3
			ignore_thresh = .5
			truth_thresh = 1
			random=1

			@@ -1,7 +1,7 @@
			rem Run this file and then open URL in Chrome/Firefox: rem http://localhost:8090
			rem Or open: http://ip-address:8090

			darknet.exe detector demo data/voc.data yolo-voc.cfg yolo-voc.weights test.mp4 -i 0 -http_port 8090
			darknet.exe detector demo data/voc.data yolo-voc.cfg yolo-voc.weights test.mp4 -i 0 -http_port 8090 -dont_show


			pause

New file
			@@ -0,0 +1,5 @@

			darknet.exe detector test data/coco.data yolov3.cfg yolov3.weights -i 0 -thresh 0.25 dogr.jpg


			pause

			@@ -229,10 +229,12 @@
			<ClCompile Include="..\..\src\swag.c" />
			<ClCompile Include="..\..\src\tag.c" />
			<ClCompile Include="..\..\src\tree.c" />
			<ClCompile Include="..\..\src\upsample_layer.c" />
			<ClCompile Include="..\..\src\utils.c" />
			<ClCompile Include="..\..\src\voxel.c" />
			<ClCompile Include="..\..\src\writing.c" />
			<ClCompile Include="..\..\src\yolo.c" />
			<ClCompile Include="..\..\src\yolo_layer.c" />
			<ClCompile Include="..\..\src\yolo_v2_class.cpp" />
			<ClCompile Include="..\..\src\yolo_v2_class.hpp" />
			</ItemGroup>
			@@ -283,7 +285,9 @@
			<ClInclude Include="..\..\src\stb_image_write.h" />
			<ClInclude Include="..\..\src\tree.h" />
			<ClInclude Include="..\..\src\unistd.h" />
			<ClInclude Include="..\..\src\upsample_layer.h" />
			<ClInclude Include="..\..\src\utils.h" />
			<ClInclude Include="..\..\src\yolo_layer.h" />
			</ItemGroup>
			<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
			<ImportGroup Label="ExtensionTargets">

			@@ -213,10 +213,12 @@
			<ClCompile Include="..\..\src\swag.c" />
			<ClCompile Include="..\..\src\tag.c" />
			<ClCompile Include="..\..\src\tree.c" />
			<ClCompile Include="..\..\src\upsample_layer.c" />
			<ClCompile Include="..\..\src\utils.c" />
			<ClCompile Include="..\..\src\voxel.c" />
			<ClCompile Include="..\..\src\writing.c" />
			<ClCompile Include="..\..\src\yolo.c" />
			<ClCompile Include="..\..\src\yolo_layer.c" />
			<ClCompile Include="..\..\src\yolo_v2_class.cpp" />
			<ClCompile Include="..\..\src\yolo_v2_class.hpp" />
			</ItemGroup>
			@@ -266,7 +268,9 @@
			<ClInclude Include="..\..\src\stb_image_write.h" />
			<ClInclude Include="..\..\src\tree.h" />
			<ClInclude Include="..\..\src\unistd.h" />
			<ClInclude Include="..\..\src\upsample_layer.h" />
			<ClInclude Include="..\..\src\utils.h" />
			<ClInclude Include="..\..\src\yolo_layer.h" />
			</ItemGroup>
			<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
			<ImportGroup Label="ExtensionTargets" />

New file
			@@ -0,0 +1,6 @@


			./darknet detector test ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights data/dog.jpg -i 0 -thresh 0.25

			@@ -291,3 +291,19 @@
			}
			}

			void upsample_cpu(float in, int w, int h, int c, int batch, int stride, int forward, float scale, float out)
			{
			int i, j, k, b;
			for (b = 0; b < batch; ++b) {
			for (k = 0; k < c; ++k) {
			for (j = 0; j < h*stride; ++j) {
			for (i = 0; i < w*stride; ++i) {
			int in_index = bwhc + kwh + (j / stride)w + i / stride;
			int out_index = bwhcstridestride + kwhstridestride + jw*stride + i;
			if (forward) out[out_index] = scale*in[in_index];
			else in[in_index] += scale*out[out_index];
			}
			}
			}
			}
			}

			@@ -36,6 +36,7 @@
			void weighted_sum_cpu(float a, float b, float s, int num, float c);

			void softmax(float input, int n, float temp, float output, int stride);
			void upsample_cpu(float in, int w, int h, int c, int batch, int stride, int forward, float scale, float out);

			#ifdef GPU
			#include "cuda.h"
			@@ -84,5 +85,7 @@

			void flatten_ongpu(float x, int spatial, int layers, int batch, int forward, float out);

			void upsample_gpu(float in, int w, int h, int c, int batch, int stride, int forward, float scale, float out);

			#endif
			#endif

			@@ -784,3 +784,34 @@
			check_error(cudaPeekAtLastError());
			}


			__global__ void upsample_kernel(size_t N, float x, int w, int h, int c, int batch, int stride, int forward, float scale, float out)
			{
			size_t i = (blockIdx.x + blockIdx.ygridDim.x) blockDim.x + threadIdx.x;
			if (i >= N) return;
			int out_index = i;
			int out_w = i % (w*stride);
			i = i / (w*stride);
			int out_h = i % (h*stride);
			i = i / (h*stride);
			int out_c = i%c;
			i = i / c;
			int b = i%batch;

			int in_w = out_w / stride;
			int in_h = out_h / stride;
			int in_c = out_c;

			int in_index = bwhc + in_cwh + in_hw + in_w;


			if (forward) out[out_index] += scale * x[in_index];
			else atomicAdd(x + in_index, scale * out[out_index]);
			}

			extern "C" void upsample_gpu(float in, int w, int h, int c, int batch, int stride, int forward, float scale, float out)
			{
			size_t size = whcbatchstride*stride;
			upsample_kernel << <cuda_gridsize(size), BLOCK >> >(size, in, w, h, c, batch, stride, forward, scale, out);
			check_error(cudaPeekAtLastError());
			}

			@@ -276,6 +276,92 @@
			free(s);
			}

			int nms_comparator_v3(const void pa, const void pb)
			{
			detection a = (detection )pa;
			detection b = (detection )pb;
			float diff = 0;
			if (b.sort_class >= 0) {
			diff = a.prob[b.sort_class] - b.prob[b.sort_class];
			}
			else {
			diff = a.objectness - b.objectness;
			}
			if (diff < 0) return 1;
			else if (diff > 0) return -1;
			return 0;
			}

			void do_nms_obj_v3(detection *dets, int total, int classes, float thresh)
			{
			int i, j, k;
			k = total - 1;
			for (i = 0; i <= k; ++i) {
			if (dets[i].objectness == 0) {
			detection swap = dets[i];
			dets[i] = dets[k];
			dets[k] = swap;
			--k;
			--i;
			}
			}
			total = k + 1;

			for (i = 0; i < total; ++i) {
			dets[i].sort_class = -1;
			}

			qsort(dets, total, sizeof(detection), nms_comparator_v3);
			for (i = 0; i < total; ++i) {
			if (dets[i].objectness == 0) continue;
			box a = dets[i].bbox;
			for (j = i + 1; j < total; ++j) {
			if (dets[j].objectness == 0) continue;
			box b = dets[j].bbox;
			if (box_iou(a, b) > thresh) {
			dets[j].objectness = 0;
			for (k = 0; k < classes; ++k) {
			dets[j].prob[k] = 0;
			}
			}
			}
			}
			}

			void do_nms_sort_v3(detection *dets, int total, int classes, float thresh)
			{
			int i, j, k;
			k = total - 1;
			for (i = 0; i <= k; ++i) {
			if (dets[i].objectness == 0) {
			detection swap = dets[i];
			dets[i] = dets[k];
			dets[k] = swap;
			--k;
			--i;
			}
			}
			total = k + 1;

			for (k = 0; k < classes; ++k) {
			for (i = 0; i < total; ++i) {
			dets[i].sort_class = k;
			}
			qsort(dets, total, sizeof(detection), nms_comparator_v3);
			for (i = 0; i < total; ++i) {
			//printf(" k = %d, \t i = %d \n", k, i);
			if (dets[i].prob[k] == 0) continue;
			box a = dets[i].bbox;
			for (j = i + 1; j < total; ++j) {
			box b = dets[j].bbox;
			if (box_iou(a, b) > thresh) {
			dets[j].prob[k] = 0;
			}
			}
			}
			}
			}

			void do_nms(box boxes, float *probs, int total, int classes, float thresh)
			{
			int i, j, k;

			@@ -9,12 +9,23 @@
			float dx, dy, dw, dh;
			} dbox;

			typedef struct detection {
			box bbox;
			int classes;
			float *prob;
			float *mask;
			float objectness;
			int sort_class;
			} detection;

			box float_to_box(float *f);
			float box_iou(box a, box b);
			float box_rmse(box a, box b);
			dbox diou(box a, box b);
			void do_nms(box boxes, float *probs, int total, int classes, float thresh);
			void do_nms_sort(box boxes, float *probs, int total, int classes, float thresh);
			void do_nms_sort_v3(detection *dets, int total, int classes, float thresh);
			void do_nms_obj_v3(detection *dets, int total, int classes, float thresh);
			box decode_box(box b, box anchor);
			box encode_box(box b, box anchor);

			@@ -50,6 +50,7 @@
			static float *avg;

			void draw_detections_cv(IplImage* show_img, int num, float thresh, box boxes, float probs, char names, image *alphabet, int classes);
			void draw_detections_cv_v3(IplImage* show_img, detection dets, int num, float thresh, char names, image *alphabet, int classes);
			void show_image_cv_ipl(IplImage disp, const char name);
			image get_image_from_stream_resize(CvCapture cap, int w, int h, IplImage* in_img, int use_webcam);
			IplImage* in_img;
			@@ -77,7 +78,7 @@

			void detect_in_thread(void ptr)
			{
			float nms = .4;
			float nms = .45; // 0.4F

			layer l = net.layers[net.n-1];
			float *X = det_s.data;
			@@ -88,6 +89,7 @@
			l.output = avg;

			free_image(det_s);
			/*
			if(l.type == DETECTION){
			get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0);
			} else if (l.type == REGION){
			@@ -96,6 +98,12 @@
			error("Last layer must produce detections\n");
			}
			if (nms > 0) do_nms(boxes, probs, l.wl.hl.n, l.classes, nms);
			*/
			int letter = 0;
			int nboxes = 0;
			detection *dets = get_network_boxes(&net, det.w, det.h, demo_thresh, demo_thresh, 0, 1, &nboxes, letter);
			if (nms) do_nms_obj_v3(dets, nboxes, l.classes, nms);

			printf("\033[2J");
			printf("\033[1;1H");
			printf("\nFPS:%.1f\n",fps);
			@@ -108,7 +116,9 @@
			demo_index = (demo_index + 1)%FRAMES;

			//draw_detections(det, l.wl.hl.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes);
			draw_detections_cv(det_img, l.wl.hl.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes);
			draw_detections_cv_v3(det_img, dets, nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes);
			//draw_detections_cv(det_img, l.wl.hl.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes);
			free(dets);

			return 0;
			}
			@@ -122,7 +132,7 @@
			return (double)time.tv_sec + (double)time.tv_usec * .000001;
			}

			void demo(char cfgfile, char weightfile, float thresh, int cam_index, const char filename, char *names, int classes,
			void demo(char cfgfile, char weightfile, float thresh, float hier_thresh, int cam_index, const char filename, char *names, int classes,
			int frame_skip, char prefix, char out_filename, int http_stream_port, int dont_show)
			{
			//skip = frame_skip;
			@@ -303,7 +313,7 @@
			}
			}
			#else
			void demo(char cfgfile, char weightfile, float thresh, int cam_index, const char filename, char names, int classes, int frame_skip, char prefix, char *out_filename, int http_stream_port, int dont_show)
			void demo(char cfgfile, char weightfile, float thresh, float hier_thresh, int cam_index, const char filename, char names, int classes, int frame_skip, char prefix, char *out_filename, int http_stream_port, int dont_show)
			{
			fprintf(stderr, "Demo needs OpenCV for webcam images.\n");
			}

			@@ -285,3 +285,31 @@
			}
			#endif

			void get_detection_detections(layer l, int w, int h, float thresh, detection *dets)
			{
			int i, j, n;
			float *predictions = l.output;
			//int per_cell = 5*num+classes;
			for (i = 0; i < l.side*l.side; ++i) {
			int row = i / l.side;
			int col = i % l.side;
			for (n = 0; n < l.n; ++n) {
			int index = i*l.n + n;
			int p_index = l.sidel.sidel.classes + i*l.n + n;
			float scale = predictions[p_index];
			int box_index = l.sidel.side(l.classes + l.n) + (il.n + n) 4;
			box b;
			b.x = (predictions[box_index + 0] + col) / l.side * w;
			b.y = (predictions[box_index + 1] + row) / l.side * h;
			b.w = pow(predictions[box_index + 2], (l.sqrt ? 2 : 1)) * w;
			b.h = pow(predictions[box_index + 3], (l.sqrt ? 2 : 1)) * h;
			dets[index].bbox = b;
			dets[index].objectness = scale;
			for (j = 0; j < l.classes; ++j) {
			int class_index = i*l.classes;
			float prob = scale*predictions[class_index + j];
			dets[index].prob[j] = (prob > thresh) ? prob : 0;
			}
			}
			}
			}

			@@ -10,6 +10,7 @@
			void forward_detection_layer(const detection_layer l, network_state state);
			void backward_detection_layer(const detection_layer l, network_state state);
			void get_detection_boxes(layer l, int w, int h, float thresh, float *probs, box boxes, int only_objectness);
			void get_detection_detections(layer l, int w, int h, float thresh, detection *dets);

			#ifdef GPU
			void forward_detection_layer_gpu(const detection_layer l, network_state state);

			@@ -1000,7 +1000,7 @@
			}
			#endif // OPENCV

			void test_detector(char datacfg, char cfgfile, char weightfile, char filename, float thresh, int dont_show)
			void test_detector(char datacfg, char cfgfile, char weightfile, char filename, float thresh, float hier_thresh, int dont_show)
			{
			list *options = read_data_cfg(datacfg);
			char *name_list = option_find_str(options, "names", "data/names.list");
			@@ -1017,7 +1017,7 @@
			char buff[256];
			char *input = buff;
			int j;
			float nms=.4;
			float nms=.45; // 0.4F
			while(1){
			if(filename){
			strncpy(input, filename, 256);
			@@ -1030,21 +1030,27 @@
			strtok(input, "\n");
			}
			image im = load_image_color(input,0,0);
			image sized = resize_image(im, net.w, net.h);
			//image sized = letterbox_image(im, net.w, net.h);
			int letter = 0;
			//image sized = resize_image(im, net.w, net.h);
			image sized = letterbox_image(im, net.w, net.h); letter = 1;
			layer l = net.layers[net.n-1];

			box boxes = calloc(l.wl.h*l.n, sizeof(box));
			float *probs = calloc(l.wl.hl.n, sizeof(float ));
			for(j = 0; j < l.wl.hl.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
			//box boxes = calloc(l.wl.h*l.n, sizeof(box));
			//float *probs = calloc(l.wl.hl.n, sizeof(float ));
			//for(j = 0; j < l.wl.hl.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));

			float *X = sized.data;
			time=clock();
			network_predict(net, X);
			printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time));
			get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0);
			if (nms) do_nms_sort(boxes, probs, l.wl.hl.n, l.classes, nms);
			draw_detections(im, l.wl.hl.n, thresh, boxes, probs, names, alphabet, l.classes);
			//get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0);
			// if (nms) do_nms_sort(boxes, probs, l.wl.hl.n, l.classes, nms);
			//draw_detections(im, l.wl.hl.n, thresh, boxes, probs, names, alphabet, l.classes);
			int nboxes = 0;
			detection *dets = get_network_boxes(&net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes, letter);
			if (nms) do_nms_sort_v3(dets, nboxes, l.classes, nms);
			draw_detections_v3(im, dets, nboxes, thresh, names, alphabet, l.classes);
			free_detections(dets, nboxes);
			save_image(im, "predictions");
			if (!dont_show) {
			show_image(im, "predictions");
			@@ -1052,8 +1058,8 @@

			free_image(im);
			free_image(sized);
			free(boxes);
			free_ptrs((void *)probs, l.wl.h*l.n);
			//free(boxes);
			//free_ptrs((void *)probs, l.wl.h*l.n);
			#ifdef OPENCV
			if (!dont_show) {
			cvWaitKey(0);
			@@ -1071,7 +1077,8 @@
			int http_stream_port = find_int_arg(argc, argv, "-http_port", -1);
			char *out_filename = find_char_arg(argc, argv, "-out_filename", 0);
			char *prefix = find_char_arg(argc, argv, "-prefix", 0);
			float thresh = find_float_arg(argc, argv, "-thresh", .24);
			float thresh = find_float_arg(argc, argv, "-thresh", .25); // 0.24
			float hier_thresh = find_float_arg(argc, argv, "-hier", .5);
			int cam_index = find_int_arg(argc, argv, "-c", 0);
			int frame_skip = find_int_arg(argc, argv, "-s", 0);
			int num_of_clusters = find_int_arg(argc, argv, "-num_of_clusters", 5);
			@@ -1112,7 +1119,7 @@
			if(weights)
			if (weights[strlen(weights) - 1] == 0x0d) weights[strlen(weights) - 1] = 0;
			char *filename = (argc > 6) ? argv[6]: 0;
			if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, dont_show);
			if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, dont_show);
			else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show);
			else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights);
			else if(0==strcmp(argv[2], "recall")) validate_detector_recall(datacfg, cfg, weights);
			@@ -1125,7 +1132,7 @@
			char **names = get_labels(name_list);
			if(filename)
			if (filename[strlen(filename) - 1] == 0x0d) filename[strlen(filename) - 1] = 0;
			demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, out_filename,
			demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, frame_skip, prefix, out_filename,
			http_stream_port, dont_show);
			}
			}

			@@ -93,6 +93,23 @@
			return b;
			}

			image get_label_v3(image *characters, char string, int size)
			{
			size = size / 10;
			if (size > 7) size = 7;
			image label = make_empty_image(0, 0, 0);
			while (*string) {
			image l = characters[size][(int)*string];
			image n = tile_images(label, l, -size - 1 + (size + 1) / 2);
			free_image(label);
			label = n;
			++string;
			}
			image b = border_image(label, label.h*.25);
			free_image(label);
			return b;
			}

			void draw_label(image a, int r, int c, image label, const float *rgb)
			{
			int w = label.w;
			@@ -183,6 +200,80 @@
			return alphabets;
			}

			void draw_detections_v3(image im, detection dets, int num, float thresh, char names, image *alphabet, int classes)
			{
			int i, j;

			for (i = 0; i < num; ++i) {
			char labelstr[4096] = { 0 };
			int class_id = -1;
			for (j = 0; j < classes; ++j) {
			if (dets[i].prob[j] > thresh) {
			if (class_id < 0) {
			strcat(labelstr, names[j]);
			class_id = j;
			}
			else {
			strcat(labelstr, ", ");
			strcat(labelstr, names[j]);
			}
			printf("%s: %.0f%%\n", names[j], dets[i].prob[j] * 100);
			}
			}
			if (class_id >= 0) {
			int width = im.h * .006;

			/*
			if(0){
			width = pow(prob, 1./2.)*10+1;
			alphabet = 0;
			}
			*/

			//printf("%d %s: %.0f%%\n", i, names[class_id], prob*100);
			int offset = class_id * 123457 % classes;
			float red = get_color(2, offset, classes);
			float green = get_color(1, offset, classes);
			float blue = get_color(0, offset, classes);
			float rgb[3];

			//width = prob*20+2;

			rgb[0] = red;
			rgb[1] = green;
			rgb[2] = blue;
			box b = dets[i].bbox;
			//printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);

			int left = (b.x - b.w / 2.)*im.w;
			int right = (b.x + b.w / 2.)*im.w;
			int top = (b.y - b.h / 2.)*im.h;
			int bot = (b.y + b.h / 2.)*im.h;

			if (left < 0) left = 0;
			if (right > im.w - 1) right = im.w - 1;
			if (top < 0) top = 0;
			if (bot > im.h - 1) bot = im.h - 1;

			draw_box_width(im, left, top, right, bot, width, red, green, blue);
			if (alphabet) {
			image label = get_label_v3(alphabet, labelstr, (im.h*.03));
			draw_label(im, top + width, left, label, rgb);
			free_image(label);
			}
			if (dets[i].mask) {
			image mask = float_to_image(14, 14, 1, dets[i].mask);
			image resized_mask = resize_image(mask, b.wim.w, b.him.h);
			image tmask = threshold_image(resized_mask, .5);
			embed_image(tmask, im, left, top);
			free_image(mask);
			free_image(resized_mask);
			free_image(tmask);
			}
			}
			}
			}

			void draw_detections(image im, int num, float thresh, box boxes, float probs, char names, image *alphabet, int classes)
			{
			int i;
			@@ -245,6 +336,93 @@
			}

			#ifdef OPENCV

			void draw_detections_cv_v3(IplImage* show_img, detection dets, int num, float thresh, char names, image *alphabet, int classes)
			{
			int i, j;
			if (!show_img) return;

			for (i = 0; i < num; ++i) {
			char labelstr[4096] = { 0 };
			int class_id = -1;
			for (j = 0; j < classes; ++j) {
			if (dets[i].prob[j] > thresh) {
			if (class_id < 0) {
			strcat(labelstr, names[j]);
			class_id = j;
			}
			else {
			strcat(labelstr, ", ");
			strcat(labelstr, names[j]);
			}
			printf("%s: %.0f%%\n", names[j], dets[i].prob[j] * 100);
			}
			}
			if (class_id >= 0) {
			int width = show_img->height * .006;

			/*
			if(0){
			width = pow(prob, 1./2.)*10+1;
			alphabet = 0;
			}
			*/

			//printf("%d %s: %.0f%%\n", i, names[class_id], prob*100);
			int offset = class_id * 123457 % classes;
			float red = get_color(2, offset, classes);
			float green = get_color(1, offset, classes);
			float blue = get_color(0, offset, classes);
			float rgb[3];

			//width = prob*20+2;

			rgb[0] = red;
			rgb[1] = green;
			rgb[2] = blue;
			box b = dets[i].bbox;
			//printf("%f %f %f %f\n", b.x, b.y, b.w, b.h);

			int left = (b.x - b.w / 2.)*show_img->width;
			int right = (b.x + b.w / 2.)*show_img->width;
			int top = (b.y - b.h / 2.)*show_img->height;
			int bot = (b.y + b.h / 2.)*show_img->height;

			if (left < 0) left = 0;
			if (right > show_img->width - 1) right = show_img->width - 1;
			if (top < 0) top = 0;
			if (bot > show_img->height - 1) bot = show_img->height - 1;

			float const font_size = show_img->height / 1000.F;
			CvPoint pt1, pt2, pt_text, pt_text_bg1, pt_text_bg2;
			pt1.x = left;
			pt1.y = top;
			pt2.x = right;
			pt2.y = bot;
			pt_text.x = left;
			pt_text.y = top - 12;
			pt_text_bg1.x = left;
			pt_text_bg1.y = top - (10 + 25 * font_size);
			pt_text_bg2.x = right;
			pt_text_bg2.y = top;
			CvScalar color;
			color.val[0] = red * 256;
			color.val[1] = green * 256;
			color.val[2] = blue * 256;

			cvRectangle(show_img, pt1, pt2, color, width, 8, 0);
			//printf("left=%d, right=%d, top=%d, bottom=%d, obj_id=%d, obj=%s \n", left, right, top, bot, class_id, names[class_id]);
			cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, width, 8, 0);
			cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, CV_FILLED, 8, 0); // filled
			CvScalar black_color;
			black_color.val[0] = 0;
			CvFont font;
			cvInitFont(&font, CV_FONT_HERSHEY_SIMPLEX, font_size, font_size, 0, font_size * 3, 8);
			cvPutText(show_img, names[class_id], pt_text, &font, black_color);
			}
			}
			}

			void draw_detections_cv(IplImage* show_img, int num, float thresh, box boxes, float probs, char names, image *alphabet, int classes)
			{
			int i;

			@@ -23,6 +23,7 @@
			void draw_label(image a, int r, int c, image label, const float *rgb);
			void write_label(image a, int r, int c, image characters, char string, float *rgb);
			void draw_detections(image im, int num, float thresh, box boxes, float probs, char names, image *labels, int classes);
			void draw_detections_v3(image im, detection dets, int num, float thresh, char names, image *alphabet, int classes);
			image image_distance(image a, image b);
			void scale_image(image m, float s);
			image crop_image(image im, int dx, int dy, int w, int h);

			@@ -33,7 +33,9 @@
			NETWORK,
			XNOR,
			REGION,
			YOLO,
			REORG,
			UPSAMPLE,
			REORG_OLD,
			BLANK
			} LAYER_TYPE;
			@@ -109,6 +111,9 @@
			int noadjust;
			int reorg;
			int log;
			int tanh;
			int *mask;
			int total;

			int adam;
			float B1;
			@@ -133,7 +138,10 @@
			float class_scale;
			int bias_match;
			int random;
			float ignore_thresh;
			float truth_thresh;
			float thresh;
			float focus;
			int classfix;
			int absolute;

			@@ -27,6 +27,7 @@
			#include "dropout_layer.h"
			#include "route_layer.h"
			#include "shortcut_layer.h"
			#include "yolo_layer.h"

			int get_current_batch(network net)
			{
			@@ -499,6 +500,107 @@
			return out;
			}

			int num_detections(network *net, float thresh)
			{
			int i;
			int s = 0;
			for (i = 0; i < net->n; ++i) {
			layer l = net->layers[i];
			if (l.type == YOLO) {
			s += yolo_num_detections(l, thresh);
			}
			if (l.type == DETECTION \|\| l.type == REGION) {
			s += l.wl.hl.n;
			}
			}
			return s;
			}

			detection make_network_boxes(network net, float thresh, int *num)
			{
			layer l = net->layers[net->n - 1];
			int i;
			int nboxes = num_detections(net, thresh);
			if (num) *num = nboxes;
			detection *dets = calloc(nboxes, sizeof(detection));
			for (i = 0; i < nboxes; ++i) {
			dets[i].prob = calloc(l.classes, sizeof(float));
			if (l.coords > 4) {
			dets[i].mask = calloc(l.coords - 4, sizeof(float));
			}
			}
			return dets;
			}


			void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, float thresh, int map, float hier, int relative, detection dets, int letter)
			{
			box boxes = calloc(l.wl.h*l.n, sizeof(box));
			float *probs = calloc(l.wl.hl.n, sizeof(float ));
			int i, j;
			for (j = 0; j < l.wl.hl.n; ++j) probs[j] = calloc(l.classes, sizeof(float *));
			get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map);
			for (j = 0; j < l.wl.hl.n; ++j) {
			dets[j].classes = l.classes;
			dets[j].bbox = boxes[j];
			dets[j].objectness = 1;
			for (i = 0; i < l.classes; ++i) dets[j].prob[i] = probs[j][i];
			}

			free(boxes);
			free_ptrs((void *)probs, l.wl.h*l.n);
			}

			void fill_network_boxes(network net, int w, int h, float thresh, float hier, int map, int relative, detection *dets, int letter)
			{
			int j;
			for (j = 0; j < net->n; ++j) {
			layer l = net->layers[j];
			if (l.type == YOLO) {
			int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets, letter);
			dets += count;
			}
			if (l.type == REGION) {
			custom_get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets, letter);
			//get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets);
			dets += l.wl.hl.n;
			}
			if (l.type == DETECTION) {
			get_detection_detections(l, w, h, thresh, dets);
			dets += l.wl.hl.n;
			}
			}
			}

			detection get_network_boxes(network net, int w, int h, float thresh, float hier, int map, int relative, int num, int letter)
			{
			detection *dets = make_network_boxes(net, thresh, num);
			fill_network_boxes(net, w, h, thresh, hier, map, relative, dets, letter);
			return dets;
			}

			void free_detections(detection *dets, int n)
			{
			int i;
			for (i = 0; i < n; ++i) {
			free(dets[i].prob);
			if (dets[i].mask) free(dets[i].mask);
			}
			free(dets);
			}

			float network_predict_image(network net, image im)
			{
			image imr = letterbox_image(im, net->w, net->h);
			set_batch_network(net, 1);
			float p = network_predict(net, imr.data);
			free_image(imr);
			return p;
			}

			int network_width(network *net) { return net->w; }
			int network_height(network *net) { return net->h; }

			matrix network_predict_data_multi(network net, data test, int n)
			{
			int i,j,b,m;

			@@ -132,6 +132,7 @@
			void set_batch_network(network *net, int b);
			int get_network_input_size(network net);
			float get_network_cost(network net);
			detection get_network_boxes(network net, int w, int h, float thresh, float hier, int map, int relative, int num, int letter);

			int get_network_nuisance(network net);
			int get_network_background(network net);

			@@ -30,6 +30,8 @@
			#include "shortcut_layer.h"
			#include "softmax_layer.h"
			#include "utils.h"
			#include "upsample_layer.h"
			#include "yolo_layer.h"
			#include <stdint.h>

			typedef struct{
			@@ -47,6 +49,7 @@
			if (strcmp(type, "[cost]")==0) return COST;
			if (strcmp(type, "[detection]")==0) return DETECTION;
			if (strcmp(type, "[region]")==0) return REGION;
			if (strcmp(type, "[yolo]") == 0) return YOLO;
			if (strcmp(type, "[local]")==0) return LOCAL;
			if (strcmp(type, "[conv]")==0
			\|\| strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
			@@ -71,6 +74,7 @@
			if (strcmp(type, "[soft]")==0
			\|\| strcmp(type, "[softmax]")==0) return SOFTMAX;
			if (strcmp(type, "[route]")==0) return ROUTE;
			if (strcmp(type, "[upsample]") == 0) return UPSAMPLE;
			return BLANK;
			}

			@@ -235,6 +239,65 @@
			return layer;
			}

			int parse_yolo_mask(char a, int *num)
			{
			int *mask = 0;
			if (a) {
			int len = strlen(a);
			int n = 1;
			int i;
			for (i = 0; i < len; ++i) {
			if (a[i] == ',') ++n;
			}
			mask = calloc(n, sizeof(int));
			for (i = 0; i < n; ++i) {
			int val = atoi(a);
			mask[i] = val;
			a = strchr(a, ',') + 1;
			}
			*num = n;
			}
			return mask;
			}

			layer parse_yolo(list *options, size_params params)
			{
			int classes = option_find_int(options, "classes", 20);
			int total = option_find_int(options, "num", 1);
			int num = total;

			char *a = option_find_str(options, "mask", 0);
			int *mask = parse_yolo_mask(a, &num);
			layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes);
			assert(l.outputs == params.inputs);

			l.max_boxes = option_find_int_quiet(options, "max", 90);
			l.jitter = option_find_float(options, "jitter", .2);

			l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
			l.truth_thresh = option_find_float(options, "truth_thresh", 1);
			l.random = option_find_int_quiet(options, "random", 0);

			char *map_file = option_find_str(options, "map", 0);
			if (map_file) l.map = read_map(map_file);

			a = option_find_str(options, "anchors", 0);
			if (a) {
			int len = strlen(a);
			int n = 1;
			int i;
			for (i = 0; i < len; ++i) {
			if (a[i] == ',') ++n;
			}
			for (i = 0; i < n; ++i) {
			float bias = atof(a);
			l.biases[i] = bias;
			a = strchr(a, ',') + 1;
			}
			}
			return l;
			}

			layer parse_region(list *options, size_params params)
			{
			int coords = option_find_int(options, "coords", 4);
			@@ -469,6 +532,15 @@
			return l;
			}

			layer parse_upsample(list *options, size_params params, network net)
			{

			int stride = option_find_int(options, "stride", 2);
			layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride);
			l.scale = option_find_float_quiet(options, "scale", 1);
			return l;
			}

			route_layer parse_route(list *options, size_params params, network net)
			{
			char *l = option_find(options, "layers");
			@@ -665,6 +737,8 @@
			l = parse_cost(options, params);
			}else if(lt == REGION){
			l = parse_region(options, params);
			}else if (lt == YOLO) {
			l = parse_yolo(options, params);
			}else if(lt == DETECTION){
			l = parse_detection(options, params);
			}else if(lt == SOFTMAX){
			@@ -684,6 +758,8 @@
			l = parse_avgpool(options, params);
			}else if(lt == ROUTE){
			l = parse_route(options, params, net);
			}else if (lt == UPSAMPLE) {
			l = parse_upsample(options, params, net);
			}else if(lt == SHORTCUT){
			l = parse_shortcut(options, params, net);
			}else if(lt == DROPOUT){

			@@ -130,12 +130,14 @@
			} else {
			// Focal loss
			if (focal_loss) {
			// Focal Loss for Dense Object Detection: http://blog.csdn.net/linmingan/article/details/77885832
			// Focal Loss
			float alpha = 0.5; // 0.25 or 0.5
			//float gamma = 2; // hardcoded in many places of the grad-formula

			int ti = index + class_id;
			float grad = -2 * (1 - output[ti])logf(fmaxf(output[ti], 0.0000001))output[ti] + (1 - output[ti])*(1 - output[ti]);
			float pt = output[ti] + 0.000000000000001F;
			//float grad = -(1 - pt) * (2 * pt*logf(pt) + pt - 1); // http://blog.csdn.net/linmingan/article/details/77885832
			float grad = (1 - pt) * (2 * pt*logf(pt) + pt - 1); // https://github.com/unsky/focal-loss

			for (n = 0; n < classes; ++n) {
			delta[index + n] = scale * (((n == class_id) ? 1 : 0) - output[index + n]);
			@@ -165,6 +167,13 @@
			return (x != x);
			}

			static int entry_index(layer l, int batch, int location, int entry)
			{
			int n = location / (l.w*l.h);
			int loc = location % (l.w*l.h);
			return batchl.outputs + nl.wl.h(l.coords + l.classes + 1) + entryl.wl.h + loc;
			}

			void softmax_tree(float input, int batch, int inputs, float temp, tree hierarchy, float *output);
			void forward_region_layer(const region_layer l, network_state state)
			{
			@@ -454,3 +463,109 @@
			}
			#endif


			void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative)
			{
			int i;
			int new_w = 0;
			int new_h = 0;
			if (((float)netw / w) < ((float)neth / h)) {
			new_w = netw;
			new_h = (h * netw) / w;
			}
			else {
			new_h = neth;
			new_w = (w * neth) / h;
			}
			for (i = 0; i < n; ++i) {
			box b = dets[i].bbox;
			b.x = (b.x - (netw - new_w) / 2. / netw) / ((float)new_w / netw);
			b.y = (b.y - (neth - new_h) / 2. / neth) / ((float)new_h / neth);
			b.w *= (float)netw / new_w;
			b.h *= (float)neth / new_h;
			if (!relative) {
			b.x *= w;
			b.w *= w;
			b.y *= h;
			b.h *= h;
			}
			dets[i].bbox = b;
			}
			}

			void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int map, float tree_thresh, int relative, detection dets)
			{
			int i, j, n, z;
			float *predictions = l.output;
			if (l.batch == 2) {
			float *flip = l.output + l.outputs;
			for (j = 0; j < l.h; ++j) {
			for (i = 0; i < l.w / 2; ++i) {
			for (n = 0; n < l.n; ++n) {
			for (z = 0; z < l.classes + l.coords + 1; ++z) {
			int i1 = zl.wl.hl.n + nl.wl.h + jl.w + i;
			int i2 = zl.wl.hl.n + nl.wl.h + jl.w + (l.w - i - 1);
			float swap = flip[i1];
			flip[i1] = flip[i2];
			flip[i2] = swap;
			if (z == 0) {
			flip[i1] = -flip[i1];
			flip[i2] = -flip[i2];
			}
			}
			}
			}
			}
			for (i = 0; i < l.outputs; ++i) {
			l.output[i] = (l.output[i] + flip[i]) / 2.;
			}
			}
			for (i = 0; i < l.w*l.h; ++i) {
			int row = i / l.w;
			int col = i % l.w;
			for (n = 0; n < l.n; ++n) {
			int index = nl.wl.h + i;
			for (j = 0; j < l.classes; ++j) {
			dets[index].prob[j] = 0;
			}
			int obj_index = entry_index(l, 0, nl.wl.h + i, l.coords);
			int box_index = entry_index(l, 0, nl.wl.h + i, 0);
			int mask_index = entry_index(l, 0, nl.wl.h + i, 4);
			float scale = l.background ? 1 : predictions[obj_index];
			dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h);
			dets[index].objectness = scale > thresh ? scale : 0;
			if (dets[index].mask) {
			for (j = 0; j < l.coords - 4; ++j) {
			dets[index].mask[j] = l.output[mask_index + jl.wl.h];
			}
			}

			int class_index = entry_index(l, 0, nl.wl.h + i, l.coords + !l.background);
			if (l.softmax_tree) {

			hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h);
			if (map) {
			for (j = 0; j < 200; ++j) {
			int class_index = entry_index(l, 0, nl.wl.h + i, l.coords + 1 + map[j]);
			float prob = scale*predictions[class_index];
			dets[index].prob[j] = (prob > thresh) ? prob : 0;
			}
			}
			else {
			int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h);
			dets[index].prob[j] = (scale > thresh) ? scale : 0;
			}
			}
			else {
			if (dets[index].objectness) {
			for (j = 0; j < l.classes; ++j) {
			int class_index = entry_index(l, 0, nl.wl.h + i, l.coords + 1 + j);
			float prob = scale*predictions[class_index];
			dets[index].prob[j] = (prob > thresh) ? prob : 0;
			}
			}
			}
			}
			}
			correct_region_boxes(dets, l.wl.hl.n, w, h, netw, neth, relative);
			}

			@@ -11,6 +11,7 @@
			void backward_region_layer(const region_layer l, network_state state);
			void get_region_boxes(layer l, int w, int h, float thresh, float *probs, box boxes, int only_objectness, int *map);
			void resize_region_layer(layer *l, int w, int h);
			void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int map, float tree_thresh, int relative, detection dets);

			#ifdef GPU
			void forward_region_layer_gpu(const region_layer l, network_state state);

			@@ -50,6 +50,38 @@
			}
			}

			int hierarchy_top_prediction(float predictions, tree hier, float thresh, int stride)
			{
			float p = 1;
			int group = 0;
			int i;
			while (1) {
			float max = 0;
			int max_i = 0;

			for (i = 0; i < hier->group_size[group]; ++i) {
			int index = i + hier->group_offset[group];
			float val = predictions[(i + hier->group_offset[group])*stride];
			if (val > max) {
			max_i = index;
			max = val;
			}
			}
			if (p*max > thresh) {
			p = p*max;
			group = hier->child[max_i];
			if (hier->child[max_i] < 0) return max_i;
			}
			else if (group == 0) {
			return max_i;
			}
			else {
			return hier->parent[hier->group_offset[group]];
			}
			}
			return 0;
			}

			tree read_tree(char filename)
			{
			tree t = {0};

			@@ -5,6 +5,7 @@
			int *leaf;
			int n;
			int *parent;
			int *child;
			int *group;
			char **name;

			@@ -14,6 +15,7 @@
			} tree;

			tree read_tree(char filename);
			int hierarchy_top_prediction(float predictions, tree hier, float thresh, int stride);
			void hierarchy_predictions(float predictions, int n, tree hier, int only_leaves);
			void change_leaves(tree t, char leaf_list);
			float get_hierarchy_probability(float x, tree hier, int c);

New file
			@@ -0,0 +1,106 @@
			#include "upsample_layer.h"
			#include "cuda.h"
			#include "blas.h"

			#include <stdio.h>

			layer make_upsample_layer(int batch, int w, int h, int c, int stride)
			{
			layer l = {0};
			l.type = UPSAMPLE;
			l.batch = batch;
			l.w = w;
			l.h = h;
			l.c = c;
			l.out_w = w*stride;
			l.out_h = h*stride;
			l.out_c = c;
			if(stride < 0){
			stride = -stride;
			l.reverse=1;
			l.out_w = w/stride;
			l.out_h = h/stride;
			}
			l.stride = stride;
			l.outputs = l.out_wl.out_hl.out_c;
			l.inputs = l.wl.hl.c;
			l.delta = calloc(l.outputs*batch, sizeof(float));
			l.output = calloc(l.outputs*batch, sizeof(float));;

			l.forward = forward_upsample_layer;
			l.backward = backward_upsample_layer;
			#ifdef GPU
			l.forward_gpu = forward_upsample_layer_gpu;
			l.backward_gpu = backward_upsample_layer_gpu;

			l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch);
			l.output_gpu = cuda_make_array(l.output, l.outputs*batch);
			#endif
			if(l.reverse) fprintf(stderr, "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c);
			else fprintf(stderr, "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c);
			return l;
			}

			void resize_upsample_layer(layer *l, int w, int h)
			{
			l->w = w;
			l->h = h;
			l->out_w = w*l->stride;
			l->out_h = h*l->stride;
			if(l->reverse){
			l->out_w = w/l->stride;
			l->out_h = h/l->stride;
			}
			l->outputs = l->out_wl->out_hl->out_c;
			l->inputs = l->hl->wl->c;
			l->delta = realloc(l->delta, l->outputsl->batchsizeof(float));
			l->output = realloc(l->output, l->outputsl->batchsizeof(float));

			#ifdef GPU
			cuda_free(l->output_gpu);
			cuda_free(l->delta_gpu);
			l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch);
			l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch);
			#endif

			}

			void forward_upsample_layer(const layer l, network_state net)
			{
			fill_cpu(l.outputs*l.batch, 0, l.output, 1);
			if(l.reverse){
			upsample_cpu(l.output, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input);
			}else{
			upsample_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output);
			}
			}

			void backward_upsample_layer(const layer l, network_state state)
			{
			if(l.reverse){
			upsample_cpu(l.delta, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, state.delta);
			}else{
			upsample_cpu(state.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta);
			}
			}

			#ifdef GPU
			void forward_upsample_layer_gpu(const layer l, network_state state)
			{
			fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1);
			if(l.reverse){
			upsample_gpu(l.output_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, state.input);
			}else{
			upsample_gpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output_gpu);
			}
			}

			void backward_upsample_layer_gpu(const layer l, network_state state)
			{
			if(l.reverse){
			upsample_gpu(l.delta_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, state.delta);
			}else{
			upsample_gpu(state.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta_gpu);
			}
			}
			#endif

New file
			@@ -0,0 +1,17 @@
			#ifndef UPSAMPLE_LAYER_H
			#define UPSAMPLE_LAYER_H
			#include "cuda.h"
			#include "layer.h"
			#include "network.h"

			layer make_upsample_layer(int batch, int w, int h, int c, int stride);
			void forward_upsample_layer(const layer l, network net);
			void backward_upsample_layer(const layer l, network net);
			void resize_upsample_layer(layer *l, int w, int h);

			#ifdef GPU
			void forward_upsample_layer_gpu(const layer l, network net);
			void backward_upsample_layer_gpu(const layer l, network net);
			#endif

			#endif