27 files modified
10 files added
| | |
| | | endif |
| | | endif |
| | | |
| | | OBJ=http_stream.o gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o |
| | | OBJ=http_stream.o gemm.o utils.o cuda.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o detector.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o demo.o tag.o cifar.o go.o batchnorm_layer.o art.o region_layer.o reorg_layer.o reorg_old_layer.o super.o voxel.o tree.o yolo_layer.o upsample_layer.o |
| | | ifeq ($(GPU), 1) |
| | | LDFLAGS+= -lstdc++ |
| | | OBJ+=convolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o network_kernels.o avgpool_layer_kernels.o |
| | |
| | | <ClCompile Include="..\..\src\swag.c" /> |
| | | <ClCompile Include="..\..\src\tag.c" /> |
| | | <ClCompile Include="..\..\src\tree.c" /> |
| | | <ClCompile Include="..\..\src\upsample_layer.c" /> |
| | | <ClCompile Include="..\..\src\utils.c" /> |
| | | <ClCompile Include="..\..\src\voxel.c" /> |
| | | <ClCompile Include="..\..\src\writing.c" /> |
| | | <ClCompile Include="..\..\src\yolo.c" /> |
| | | <ClCompile Include="..\..\src\yolo_layer.c" /> |
| | | </ItemGroup> |
| | | <ItemGroup> |
| | | <ClInclude Include="..\..\src\activations.h" /> |
| | |
| | | <ClInclude Include="..\..\src\stb_image_write.h" /> |
| | | <ClInclude Include="..\..\src\tree.h" /> |
| | | <ClInclude Include="..\..\src\unistd.h" /> |
| | | <ClInclude Include="..\..\src\upsample_layer.h" /> |
| | | <ClInclude Include="..\..\src\utils.h" /> |
| | | <ClInclude Include="..\..\src\yolo_layer.h" /> |
| | | </ItemGroup> |
| | | <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
| | | <ImportGroup Label="ExtensionTargets"> |
| | |
| | | <ClCompile Include="..\..\src\swag.c" /> |
| | | <ClCompile Include="..\..\src\tag.c" /> |
| | | <ClCompile Include="..\..\src\tree.c" /> |
| | | <ClCompile Include="..\..\src\upsample_layer.c" /> |
| | | <ClCompile Include="..\..\src\utils.c" /> |
| | | <ClCompile Include="..\..\src\voxel.c" /> |
| | | <ClCompile Include="..\..\src\writing.c" /> |
| | | <ClCompile Include="..\..\src\yolo.c" /> |
| | | <ClCompile Include="..\..\src\yolo_layer.c" /> |
| | | </ItemGroup> |
| | | <ItemGroup> |
| | | <ClInclude Include="..\..\src\activations.h" /> |
| | |
| | | <ClInclude Include="..\..\src\stb_image_write.h" /> |
| | | <ClInclude Include="..\..\src\tree.h" /> |
| | | <ClInclude Include="..\..\src\unistd.h" /> |
| | | <ClInclude Include="..\..\src\upsample_layer.h" /> |
| | | <ClInclude Include="..\..\src\utils.h" /> |
| | | <ClInclude Include="..\..\src\yolo_layer.h" /> |
| | | </ItemGroup> |
| | | <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
| | | <ImportGroup Label="ExtensionTargets" /> |
| New file |
| | |
| | | [net] |
| | | # Testing |
| | | batch=1 |
| | | subdivisions=1 |
| | | # Training |
| | | # batch=64 |
| | | # subdivisions=16 |
| | | width=416 |
| | | height=416 |
| | | channels=3 |
| | | momentum=0.9 |
| | | decay=0.0005 |
| | | angle=0 |
| | | saturation = 1.5 |
| | | exposure = 1.5 |
| | | hue=.1 |
| | | |
| | | learning_rate=0.001 |
| | | burn_in=1000 |
| | | max_batches = 500200 |
| | | policy=steps |
| | | steps=400000,450000 |
| | | scales=.1,.1 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=32 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=64 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=32 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=64 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=64 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=64 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | ###################### |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=1024 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=1024 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=1024 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | filters=255 |
| | | activation=linear |
| | | |
| | | |
| | | [yolo] |
| | | mask = 6,7,8 |
| | | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 |
| | | classes=80 |
| | | num=9 |
| | | jitter=.3 |
| | | ignore_thresh = .5 |
| | | truth_thresh = 1 |
| | | random=1 |
| | | |
| | | |
| | | [route] |
| | | layers = -4 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [upsample] |
| | | stride=2 |
| | | |
| | | [route] |
| | | layers = -1, 61 |
| | | |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=512 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=512 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=512 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | filters=255 |
| | | activation=linear |
| | | |
| | | |
| | | [yolo] |
| | | mask = 3,4,5 |
| | | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 |
| | | classes=80 |
| | | num=9 |
| | | jitter=.3 |
| | | ignore_thresh = .5 |
| | | truth_thresh = 1 |
| | | random=1 |
| | | |
| | | |
| | | |
| | | [route] |
| | | layers = -4 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [upsample] |
| | | stride=2 |
| | | |
| | | [route] |
| | | layers = -1, 36 |
| | | |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=256 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=256 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=256 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | filters=255 |
| | | activation=linear |
| | | |
| | | |
| | | [yolo] |
| | | mask = 0,1,2 |
| | | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 |
| | | classes=80 |
| | | num=9 |
| | | jitter=.3 |
| | | ignore_thresh = .5 |
| | | truth_thresh = 1 |
| | | random=1 |
| | | |
| | |
| | | rem Run this file and then open URL in Chrome/Firefox: rem http://localhost:8090 |
| | | rem Or open: http://ip-address:8090 |
| | | |
| | | darknet.exe detector demo data/voc.data yolo-voc.cfg yolo-voc.weights test.mp4 -i 0 -http_port 8090 |
| | | darknet.exe detector demo data/voc.data yolo-voc.cfg yolo-voc.weights test.mp4 -i 0 -http_port 8090 -dont_show |
| | | |
| | | |
| | | pause |
| New file |
| | |
| | | |
| | | darknet.exe detector test data/coco.data yolov3.cfg yolov3.weights -i 0 -thresh 0.25 dogr.jpg |
| | | |
| | | |
| | | pause |
| New file |
| | |
| | | [net] |
| | | # Testing |
| | | batch=1 |
| | | subdivisions=1 |
| | | # Training |
| | | # batch=64 |
| | | # subdivisions=16 |
| | | width=416 |
| | | height=416 |
| | | channels=3 |
| | | momentum=0.9 |
| | | decay=0.0005 |
| | | angle=0 |
| | | saturation = 1.5 |
| | | exposure = 1.5 |
| | | hue=.1 |
| | | |
| | | learning_rate=0.001 |
| | | burn_in=1000 |
| | | max_batches = 500200 |
| | | policy=steps |
| | | steps=400000,450000 |
| | | scales=.1,.1 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=32 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=64 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=32 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=64 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=64 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=64 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | ###################### |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=1024 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=1024 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=1024 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | filters=255 |
| | | activation=linear |
| | | |
| | | |
| | | [yolo] |
| | | mask = 6,7,8 |
| | | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 |
| | | classes=80 |
| | | num=9 |
| | | jitter=.3 |
| | | ignore_thresh = .5 |
| | | truth_thresh = 1 |
| | | random=1 |
| | | |
| | | |
| | | [route] |
| | | layers = -4 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [upsample] |
| | | stride=2 |
| | | |
| | | [route] |
| | | layers = -1, 61 |
| | | |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=512 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=512 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=512 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | filters=255 |
| | | activation=linear |
| | | |
| | | |
| | | [yolo] |
| | | mask = 3,4,5 |
| | | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 |
| | | classes=80 |
| | | num=9 |
| | | jitter=.3 |
| | | ignore_thresh = .5 |
| | | truth_thresh = 1 |
| | | random=1 |
| | | |
| | | |
| | | |
| | | [route] |
| | | layers = -4 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [upsample] |
| | | stride=2 |
| | | |
| | | [route] |
| | | layers = -1, 36 |
| | | |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=256 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=256 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=256 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | filters=255 |
| | | activation=linear |
| | | |
| | | |
| | | [yolo] |
| | | mask = 0,1,2 |
| | | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 |
| | | classes=80 |
| | | num=9 |
| | | jitter=.3 |
| | | ignore_thresh = .5 |
| | | truth_thresh = 1 |
| | | random=1 |
| | | |
| | |
| | | <ClCompile Include="..\..\src\swag.c" /> |
| | | <ClCompile Include="..\..\src\tag.c" /> |
| | | <ClCompile Include="..\..\src\tree.c" /> |
| | | <ClCompile Include="..\..\src\upsample_layer.c" /> |
| | | <ClCompile Include="..\..\src\utils.c" /> |
| | | <ClCompile Include="..\..\src\voxel.c" /> |
| | | <ClCompile Include="..\..\src\writing.c" /> |
| | | <ClCompile Include="..\..\src\yolo.c" /> |
| | | <ClCompile Include="..\..\src\yolo_layer.c" /> |
| | | <ClCompile Include="..\..\src\yolo_v2_class.cpp" /> |
| | | <ClCompile Include="..\..\src\yolo_v2_class.hpp" /> |
| | | </ItemGroup> |
| | |
| | | <ClInclude Include="..\..\src\stb_image_write.h" /> |
| | | <ClInclude Include="..\..\src\tree.h" /> |
| | | <ClInclude Include="..\..\src\unistd.h" /> |
| | | <ClInclude Include="..\..\src\upsample_layer.h" /> |
| | | <ClInclude Include="..\..\src\utils.h" /> |
| | | <ClInclude Include="..\..\src\yolo_layer.h" /> |
| | | </ItemGroup> |
| | | <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
| | | <ImportGroup Label="ExtensionTargets"> |
| | |
| | | <ClCompile Include="..\..\src\swag.c" /> |
| | | <ClCompile Include="..\..\src\tag.c" /> |
| | | <ClCompile Include="..\..\src\tree.c" /> |
| | | <ClCompile Include="..\..\src\upsample_layer.c" /> |
| | | <ClCompile Include="..\..\src\utils.c" /> |
| | | <ClCompile Include="..\..\src\voxel.c" /> |
| | | <ClCompile Include="..\..\src\writing.c" /> |
| | | <ClCompile Include="..\..\src\yolo.c" /> |
| | | <ClCompile Include="..\..\src\yolo_layer.c" /> |
| | | <ClCompile Include="..\..\src\yolo_v2_class.cpp" /> |
| | | <ClCompile Include="..\..\src\yolo_v2_class.hpp" /> |
| | | </ItemGroup> |
| | |
| | | <ClInclude Include="..\..\src\stb_image_write.h" /> |
| | | <ClInclude Include="..\..\src\tree.h" /> |
| | | <ClInclude Include="..\..\src\unistd.h" /> |
| | | <ClInclude Include="..\..\src\upsample_layer.h" /> |
| | | <ClInclude Include="..\..\src\utils.h" /> |
| | | <ClInclude Include="..\..\src\yolo_layer.h" /> |
| | | </ItemGroup> |
| | | <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" /> |
| | | <ImportGroup Label="ExtensionTargets" /> |
| New file |
| | |
| | | [net] |
| | | # Testing |
| | | batch=1 |
| | | subdivisions=1 |
| | | # Training |
| | | # batch=64 |
| | | # subdivisions=16 |
| | | width=416 |
| | | height=416 |
| | | channels=3 |
| | | momentum=0.9 |
| | | decay=0.0005 |
| | | angle=0 |
| | | saturation = 1.5 |
| | | exposure = 1.5 |
| | | hue=.1 |
| | | |
| | | learning_rate=0.001 |
| | | burn_in=1000 |
| | | max_batches = 500200 |
| | | policy=steps |
| | | steps=400000,450000 |
| | | scales=.1,.1 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=32 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=64 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=32 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=64 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=64 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=64 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | # Downsample |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=2 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=1024 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [shortcut] |
| | | from=-3 |
| | | activation=linear |
| | | |
| | | ###################### |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=1024 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=1024 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=1024 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | filters=255 |
| | | activation=linear |
| | | |
| | | |
| | | [yolo] |
| | | mask = 6,7,8 |
| | | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 |
| | | classes=80 |
| | | num=9 |
| | | jitter=.3 |
| | | ignore_thresh = .5 |
| | | truth_thresh = 1 |
| | | random=1 |
| | | |
| | | |
| | | [route] |
| | | layers = -4 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [upsample] |
| | | stride=2 |
| | | |
| | | [route] |
| | | layers = -1, 61 |
| | | |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=512 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=512 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=512 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | filters=255 |
| | | activation=linear |
| | | |
| | | |
| | | [yolo] |
| | | mask = 3,4,5 |
| | | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 |
| | | classes=80 |
| | | num=9 |
| | | jitter=.3 |
| | | ignore_thresh = .5 |
| | | truth_thresh = 1 |
| | | random=1 |
| | | |
| | | |
| | | |
| | | [route] |
| | | layers = -4 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [upsample] |
| | | stride=2 |
| | | |
| | | [route] |
| | | layers = -1, 36 |
| | | |
| | | |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=256 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=256 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=256 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | size=1 |
| | | stride=1 |
| | | pad=1 |
| | | filters=255 |
| | | activation=linear |
| | | |
| | | |
| | | [yolo] |
| | | mask = 0,1,2 |
| | | anchors = 10,13, 16,30, 33,23, 30,61, 62,45, 59,119, 116,90, 156,198, 373,326 |
| | | classes=80 |
| | | num=9 |
| | | jitter=.3 |
| | | ignore_thresh = .5 |
| | | truth_thresh = 1 |
| | | random=1 |
| | | |
| New file |
| | |
| | | |
| | | |
| | | ./darknet detector test ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights data/dog.jpg -i 0 -thresh 0.25 |
| | | |
| | | |
| | | |
| | |
| | | } |
| | | } |
| | | |
| | | void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) |
| | | { |
| | | int i, j, k, b; |
| | | for (b = 0; b < batch; ++b) { |
| | | for (k = 0; k < c; ++k) { |
| | | for (j = 0; j < h*stride; ++j) { |
| | | for (i = 0; i < w*stride; ++i) { |
| | | int in_index = b*w*h*c + k*w*h + (j / stride)*w + i / stride; |
| | | int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i; |
| | | if (forward) out[out_index] = scale*in[in_index]; |
| | | else in[in_index] += scale*out[out_index]; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | |
| | | void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); |
| | | |
| | | void softmax(float *input, int n, float temp, float *output, int stride); |
| | | void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); |
| | | |
| | | #ifdef GPU |
| | | #include "cuda.h" |
| | |
| | | |
| | | void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out); |
| | | |
| | | void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); |
| | | |
| | | #endif |
| | | #endif |
| | |
| | | check_error(cudaPeekAtLastError()); |
| | | } |
| | | |
| | | |
| | | __global__ void upsample_kernel(size_t N, float *x, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) |
| | | { |
| | | size_t i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x; |
| | | if (i >= N) return; |
| | | int out_index = i; |
| | | int out_w = i % (w*stride); |
| | | i = i / (w*stride); |
| | | int out_h = i % (h*stride); |
| | | i = i / (h*stride); |
| | | int out_c = i%c; |
| | | i = i / c; |
| | | int b = i%batch; |
| | | |
| | | int in_w = out_w / stride; |
| | | int in_h = out_h / stride; |
| | | int in_c = out_c; |
| | | |
| | | int in_index = b*w*h*c + in_c*w*h + in_h*w + in_w; |
| | | |
| | | |
| | | if (forward) out[out_index] += scale * x[in_index]; |
| | | else atomicAdd(x + in_index, scale * out[out_index]); |
| | | } |
| | | |
| | | extern "C" void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out) |
| | | { |
| | | size_t size = w*h*c*batch*stride*stride; |
| | | upsample_kernel << <cuda_gridsize(size), BLOCK >> >(size, in, w, h, c, batch, stride, forward, scale, out); |
| | | check_error(cudaPeekAtLastError()); |
| | | } |
| | |
| | | free(s); |
| | | } |
| | | |
| | | int nms_comparator_v3(const void *pa, const void *pb) |
| | | { |
| | | detection a = *(detection *)pa; |
| | | detection b = *(detection *)pb; |
| | | float diff = 0; |
| | | if (b.sort_class >= 0) { |
| | | diff = a.prob[b.sort_class] - b.prob[b.sort_class]; |
| | | } |
| | | else { |
| | | diff = a.objectness - b.objectness; |
| | | } |
| | | if (diff < 0) return 1; |
| | | else if (diff > 0) return -1; |
| | | return 0; |
| | | } |
| | | |
| | | void do_nms_obj_v3(detection *dets, int total, int classes, float thresh) |
| | | { |
| | | int i, j, k; |
| | | k = total - 1; |
| | | for (i = 0; i <= k; ++i) { |
| | | if (dets[i].objectness == 0) { |
| | | detection swap = dets[i]; |
| | | dets[i] = dets[k]; |
| | | dets[k] = swap; |
| | | --k; |
| | | --i; |
| | | } |
| | | } |
| | | total = k + 1; |
| | | |
| | | for (i = 0; i < total; ++i) { |
| | | dets[i].sort_class = -1; |
| | | } |
| | | |
| | | qsort(dets, total, sizeof(detection), nms_comparator_v3); |
| | | for (i = 0; i < total; ++i) { |
| | | if (dets[i].objectness == 0) continue; |
| | | box a = dets[i].bbox; |
| | | for (j = i + 1; j < total; ++j) { |
| | | if (dets[j].objectness == 0) continue; |
| | | box b = dets[j].bbox; |
| | | if (box_iou(a, b) > thresh) { |
| | | dets[j].objectness = 0; |
| | | for (k = 0; k < classes; ++k) { |
| | | dets[j].prob[k] = 0; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | | |
| | | void do_nms_sort_v3(detection *dets, int total, int classes, float thresh) |
| | | { |
| | | int i, j, k; |
| | | k = total - 1; |
| | | for (i = 0; i <= k; ++i) { |
| | | if (dets[i].objectness == 0) { |
| | | detection swap = dets[i]; |
| | | dets[i] = dets[k]; |
| | | dets[k] = swap; |
| | | --k; |
| | | --i; |
| | | } |
| | | } |
| | | total = k + 1; |
| | | |
| | | for (k = 0; k < classes; ++k) { |
| | | for (i = 0; i < total; ++i) { |
| | | dets[i].sort_class = k; |
| | | } |
| | | qsort(dets, total, sizeof(detection), nms_comparator_v3); |
| | | for (i = 0; i < total; ++i) { |
| | | //printf(" k = %d, \t i = %d \n", k, i); |
| | | if (dets[i].prob[k] == 0) continue; |
| | | box a = dets[i].bbox; |
| | | for (j = i + 1; j < total; ++j) { |
| | | box b = dets[j].bbox; |
| | | if (box_iou(a, b) > thresh) { |
| | | dets[j].prob[k] = 0; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | | |
| | | void do_nms(box *boxes, float **probs, int total, int classes, float thresh) |
| | | { |
| | | int i, j, k; |
| | |
| | | float dx, dy, dw, dh; |
| | | } dbox; |
| | | |
| | | typedef struct detection { |
| | | box bbox; |
| | | int classes; |
| | | float *prob; |
| | | float *mask; |
| | | float objectness; |
| | | int sort_class; |
| | | } detection; |
| | | |
| | | box float_to_box(float *f); |
| | | float box_iou(box a, box b); |
| | | float box_rmse(box a, box b); |
| | | dbox diou(box a, box b); |
| | | void do_nms(box *boxes, float **probs, int total, int classes, float thresh); |
| | | void do_nms_sort(box *boxes, float **probs, int total, int classes, float thresh); |
| | | void do_nms_sort_v3(detection *dets, int total, int classes, float thresh); |
| | | void do_nms_obj_v3(detection *dets, int total, int classes, float thresh); |
| | | box decode_box(box b, box anchor); |
| | | box encode_box(box b, box anchor); |
| | | |
| | |
| | | static float *avg; |
| | | |
| | | void draw_detections_cv(IplImage* show_img, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes); |
| | | void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes); |
| | | void show_image_cv_ipl(IplImage *disp, const char *name); |
| | | image get_image_from_stream_resize(CvCapture *cap, int w, int h, IplImage** in_img, int use_webcam); |
| | | IplImage* in_img; |
| | |
| | | |
| | | void *detect_in_thread(void *ptr) |
| | | { |
| | | float nms = .4; |
| | | float nms = .45; // 0.4F |
| | | |
| | | layer l = net.layers[net.n-1]; |
| | | float *X = det_s.data; |
| | |
| | | l.output = avg; |
| | | |
| | | free_image(det_s); |
| | | /* |
| | | if(l.type == DETECTION){ |
| | | get_detection_boxes(l, 1, 1, demo_thresh, probs, boxes, 0); |
| | | } else if (l.type == REGION){ |
| | |
| | | error("Last layer must produce detections\n"); |
| | | } |
| | | if (nms > 0) do_nms(boxes, probs, l.w*l.h*l.n, l.classes, nms); |
| | | */ |
| | | int letter = 0; |
| | | int nboxes = 0; |
| | | detection *dets = get_network_boxes(&net, det.w, det.h, demo_thresh, demo_thresh, 0, 1, &nboxes, letter); |
| | | if (nms) do_nms_obj_v3(dets, nboxes, l.classes, nms); |
| | | |
| | | printf("\033[2J"); |
| | | printf("\033[1;1H"); |
| | | printf("\nFPS:%.1f\n",fps); |
| | |
| | | demo_index = (demo_index + 1)%FRAMES; |
| | | |
| | | //draw_detections(det, l.w*l.h*l.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes); |
| | | draw_detections_cv(det_img, l.w*l.h*l.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes); |
| | | draw_detections_cv_v3(det_img, dets, nboxes, demo_thresh, demo_names, demo_alphabet, demo_classes); |
| | | //draw_detections_cv(det_img, l.w*l.h*l.n, demo_thresh, boxes, probs, demo_names, demo_alphabet, demo_classes); |
| | | free(dets); |
| | | |
| | | return 0; |
| | | } |
| | |
| | | return (double)time.tv_sec + (double)time.tv_usec * .000001; |
| | | } |
| | | |
| | | void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, |
| | | void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, |
| | | int frame_skip, char *prefix, char *out_filename, int http_stream_port, int dont_show) |
| | | { |
| | | //skip = frame_skip; |
| | |
| | | } |
| | | } |
| | | #else |
| | | void demo(char *cfgfile, char *weightfile, float thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, char *out_filename, int http_stream_port, int dont_show) |
| | | void demo(char *cfgfile, char *weightfile, float thresh, float hier_thresh, int cam_index, const char *filename, char **names, int classes, int frame_skip, char *prefix, char *out_filename, int http_stream_port, int dont_show) |
| | | { |
| | | fprintf(stderr, "Demo needs OpenCV for webcam images.\n"); |
| | | } |
| | |
| | | } |
| | | #endif |
| | | |
| | | void get_detection_detections(layer l, int w, int h, float thresh, detection *dets) |
| | | { |
| | | int i, j, n; |
| | | float *predictions = l.output; |
| | | //int per_cell = 5*num+classes; |
| | | for (i = 0; i < l.side*l.side; ++i) { |
| | | int row = i / l.side; |
| | | int col = i % l.side; |
| | | for (n = 0; n < l.n; ++n) { |
| | | int index = i*l.n + n; |
| | | int p_index = l.side*l.side*l.classes + i*l.n + n; |
| | | float scale = predictions[p_index]; |
| | | int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n) * 4; |
| | | box b; |
| | | b.x = (predictions[box_index + 0] + col) / l.side * w; |
| | | b.y = (predictions[box_index + 1] + row) / l.side * h; |
| | | b.w = pow(predictions[box_index + 2], (l.sqrt ? 2 : 1)) * w; |
| | | b.h = pow(predictions[box_index + 3], (l.sqrt ? 2 : 1)) * h; |
| | | dets[index].bbox = b; |
| | | dets[index].objectness = scale; |
| | | for (j = 0; j < l.classes; ++j) { |
| | | int class_index = i*l.classes; |
| | | float prob = scale*predictions[class_index + j]; |
| | | dets[index].prob[j] = (prob > thresh) ? prob : 0; |
| | | } |
| | | } |
| | | } |
| | | } |
| | |
| | | void forward_detection_layer(const detection_layer l, network_state state); |
| | | void backward_detection_layer(const detection_layer l, network_state state); |
| | | void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness); |
| | | void get_detection_detections(layer l, int w, int h, float thresh, detection *dets); |
| | | |
| | | #ifdef GPU |
| | | void forward_detection_layer_gpu(const detection_layer l, network_state state); |
| | |
| | | } |
| | | #endif // OPENCV |
| | | |
| | | void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, int dont_show) |
| | | void test_detector(char *datacfg, char *cfgfile, char *weightfile, char *filename, float thresh, float hier_thresh, int dont_show) |
| | | { |
| | | list *options = read_data_cfg(datacfg); |
| | | char *name_list = option_find_str(options, "names", "data/names.list"); |
| | |
| | | char buff[256]; |
| | | char *input = buff; |
| | | int j; |
| | | float nms=.4; |
| | | float nms=.45; // 0.4F |
| | | while(1){ |
| | | if(filename){ |
| | | strncpy(input, filename, 256); |
| | |
| | | strtok(input, "\n"); |
| | | } |
| | | image im = load_image_color(input,0,0); |
| | | image sized = resize_image(im, net.w, net.h); |
| | | //image sized = letterbox_image(im, net.w, net.h); |
| | | int letter = 0; |
| | | //image sized = resize_image(im, net.w, net.h); |
| | | image sized = letterbox_image(im, net.w, net.h); letter = 1; |
| | | layer l = net.layers[net.n-1]; |
| | | |
| | | box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); |
| | | float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); |
| | | for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *)); |
| | | //box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); |
| | | //float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); |
| | | //for(j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *)); |
| | | |
| | | float *X = sized.data; |
| | | time=clock(); |
| | | network_predict(net, X); |
| | | printf("%s: Predicted in %f seconds.\n", input, sec(clock()-time)); |
| | | get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0); |
| | | if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms); |
| | | draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes); |
| | | //get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, 0); |
| | | // if (nms) do_nms_sort(boxes, probs, l.w*l.h*l.n, l.classes, nms); |
| | | //draw_detections(im, l.w*l.h*l.n, thresh, boxes, probs, names, alphabet, l.classes); |
| | | int nboxes = 0; |
| | | detection *dets = get_network_boxes(&net, im.w, im.h, thresh, hier_thresh, 0, 1, &nboxes, letter); |
| | | if (nms) do_nms_sort_v3(dets, nboxes, l.classes, nms); |
| | | draw_detections_v3(im, dets, nboxes, thresh, names, alphabet, l.classes); |
| | | free_detections(dets, nboxes); |
| | | save_image(im, "predictions"); |
| | | if (!dont_show) { |
| | | show_image(im, "predictions"); |
| | |
| | | |
| | | free_image(im); |
| | | free_image(sized); |
| | | free(boxes); |
| | | free_ptrs((void **)probs, l.w*l.h*l.n); |
| | | //free(boxes); |
| | | //free_ptrs((void **)probs, l.w*l.h*l.n); |
| | | #ifdef OPENCV |
| | | if (!dont_show) { |
| | | cvWaitKey(0); |
| | |
| | | int http_stream_port = find_int_arg(argc, argv, "-http_port", -1); |
| | | char *out_filename = find_char_arg(argc, argv, "-out_filename", 0); |
| | | char *prefix = find_char_arg(argc, argv, "-prefix", 0); |
| | | float thresh = find_float_arg(argc, argv, "-thresh", .24); |
| | | float thresh = find_float_arg(argc, argv, "-thresh", .25); // 0.24 |
| | | float hier_thresh = find_float_arg(argc, argv, "-hier", .5); |
| | | int cam_index = find_int_arg(argc, argv, "-c", 0); |
| | | int frame_skip = find_int_arg(argc, argv, "-s", 0); |
| | | int num_of_clusters = find_int_arg(argc, argv, "-num_of_clusters", 5); |
| | |
| | | if(weights) |
| | | if (weights[strlen(weights) - 1] == 0x0d) weights[strlen(weights) - 1] = 0; |
| | | char *filename = (argc > 6) ? argv[6]: 0; |
| | | if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, dont_show); |
| | | if(0==strcmp(argv[2], "test")) test_detector(datacfg, cfg, weights, filename, thresh, hier_thresh, dont_show); |
| | | else if(0==strcmp(argv[2], "train")) train_detector(datacfg, cfg, weights, gpus, ngpus, clear, dont_show); |
| | | else if(0==strcmp(argv[2], "valid")) validate_detector(datacfg, cfg, weights); |
| | | else if(0==strcmp(argv[2], "recall")) validate_detector_recall(datacfg, cfg, weights); |
| | |
| | | char **names = get_labels(name_list); |
| | | if(filename) |
| | | if (filename[strlen(filename) - 1] == 0x0d) filename[strlen(filename) - 1] = 0; |
| | | demo(cfg, weights, thresh, cam_index, filename, names, classes, frame_skip, prefix, out_filename, |
| | | demo(cfg, weights, thresh, hier_thresh, cam_index, filename, names, classes, frame_skip, prefix, out_filename, |
| | | http_stream_port, dont_show); |
| | | } |
| | | } |
| | |
| | | return b; |
| | | } |
| | | |
| | | image get_label_v3(image **characters, char *string, int size) |
| | | { |
| | | size = size / 10; |
| | | if (size > 7) size = 7; |
| | | image label = make_empty_image(0, 0, 0); |
| | | while (*string) { |
| | | image l = characters[size][(int)*string]; |
| | | image n = tile_images(label, l, -size - 1 + (size + 1) / 2); |
| | | free_image(label); |
| | | label = n; |
| | | ++string; |
| | | } |
| | | image b = border_image(label, label.h*.25); |
| | | free_image(label); |
| | | return b; |
| | | } |
| | | |
| | | void draw_label(image a, int r, int c, image label, const float *rgb) |
| | | { |
| | | int w = label.w; |
| | |
| | | return alphabets; |
| | | } |
| | | |
| | | void draw_detections_v3(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes) |
| | | { |
| | | int i, j; |
| | | |
| | | for (i = 0; i < num; ++i) { |
| | | char labelstr[4096] = { 0 }; |
| | | int class_id = -1; |
| | | for (j = 0; j < classes; ++j) { |
| | | if (dets[i].prob[j] > thresh) { |
| | | if (class_id < 0) { |
| | | strcat(labelstr, names[j]); |
| | | class_id = j; |
| | | } |
| | | else { |
| | | strcat(labelstr, ", "); |
| | | strcat(labelstr, names[j]); |
| | | } |
| | | printf("%s: %.0f%%\n", names[j], dets[i].prob[j] * 100); |
| | | } |
| | | } |
| | | if (class_id >= 0) { |
| | | int width = im.h * .006; |
| | | |
| | | /* |
| | | if(0){ |
| | | width = pow(prob, 1./2.)*10+1; |
| | | alphabet = 0; |
| | | } |
| | | */ |
| | | |
| | | //printf("%d %s: %.0f%%\n", i, names[class_id], prob*100); |
| | | int offset = class_id * 123457 % classes; |
| | | float red = get_color(2, offset, classes); |
| | | float green = get_color(1, offset, classes); |
| | | float blue = get_color(0, offset, classes); |
| | | float rgb[3]; |
| | | |
| | | //width = prob*20+2; |
| | | |
| | | rgb[0] = red; |
| | | rgb[1] = green; |
| | | rgb[2] = blue; |
| | | box b = dets[i].bbox; |
| | | //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); |
| | | |
| | | int left = (b.x - b.w / 2.)*im.w; |
| | | int right = (b.x + b.w / 2.)*im.w; |
| | | int top = (b.y - b.h / 2.)*im.h; |
| | | int bot = (b.y + b.h / 2.)*im.h; |
| | | |
| | | if (left < 0) left = 0; |
| | | if (right > im.w - 1) right = im.w - 1; |
| | | if (top < 0) top = 0; |
| | | if (bot > im.h - 1) bot = im.h - 1; |
| | | |
| | | draw_box_width(im, left, top, right, bot, width, red, green, blue); |
| | | if (alphabet) { |
| | | image label = get_label_v3(alphabet, labelstr, (im.h*.03)); |
| | | draw_label(im, top + width, left, label, rgb); |
| | | free_image(label); |
| | | } |
| | | if (dets[i].mask) { |
| | | image mask = float_to_image(14, 14, 1, dets[i].mask); |
| | | image resized_mask = resize_image(mask, b.w*im.w, b.h*im.h); |
| | | image tmask = threshold_image(resized_mask, .5); |
| | | embed_image(tmask, im, left, top); |
| | | free_image(mask); |
| | | free_image(resized_mask); |
| | | free_image(tmask); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | |
| | | void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes) |
| | | { |
| | | int i; |
| | |
| | | } |
| | | |
| | | #ifdef OPENCV |
| | | |
| | | void draw_detections_cv_v3(IplImage* show_img, detection *dets, int num, float thresh, char **names, image **alphabet, int classes) |
| | | { |
| | | int i, j; |
| | | if (!show_img) return; |
| | | |
| | | for (i = 0; i < num; ++i) { |
| | | char labelstr[4096] = { 0 }; |
| | | int class_id = -1; |
| | | for (j = 0; j < classes; ++j) { |
| | | if (dets[i].prob[j] > thresh) { |
| | | if (class_id < 0) { |
| | | strcat(labelstr, names[j]); |
| | | class_id = j; |
| | | } |
| | | else { |
| | | strcat(labelstr, ", "); |
| | | strcat(labelstr, names[j]); |
| | | } |
| | | printf("%s: %.0f%%\n", names[j], dets[i].prob[j] * 100); |
| | | } |
| | | } |
| | | if (class_id >= 0) { |
| | | int width = show_img->height * .006; |
| | | |
| | | /* |
| | | if(0){ |
| | | width = pow(prob, 1./2.)*10+1; |
| | | alphabet = 0; |
| | | } |
| | | */ |
| | | |
| | | //printf("%d %s: %.0f%%\n", i, names[class_id], prob*100); |
| | | int offset = class_id * 123457 % classes; |
| | | float red = get_color(2, offset, classes); |
| | | float green = get_color(1, offset, classes); |
| | | float blue = get_color(0, offset, classes); |
| | | float rgb[3]; |
| | | |
| | | //width = prob*20+2; |
| | | |
| | | rgb[0] = red; |
| | | rgb[1] = green; |
| | | rgb[2] = blue; |
| | | box b = dets[i].bbox; |
| | | //printf("%f %f %f %f\n", b.x, b.y, b.w, b.h); |
| | | |
| | | int left = (b.x - b.w / 2.)*show_img->width; |
| | | int right = (b.x + b.w / 2.)*show_img->width; |
| | | int top = (b.y - b.h / 2.)*show_img->height; |
| | | int bot = (b.y + b.h / 2.)*show_img->height; |
| | | |
| | | if (left < 0) left = 0; |
| | | if (right > show_img->width - 1) right = show_img->width - 1; |
| | | if (top < 0) top = 0; |
| | | if (bot > show_img->height - 1) bot = show_img->height - 1; |
| | | |
| | | float const font_size = show_img->height / 1000.F; |
| | | CvPoint pt1, pt2, pt_text, pt_text_bg1, pt_text_bg2; |
| | | pt1.x = left; |
| | | pt1.y = top; |
| | | pt2.x = right; |
| | | pt2.y = bot; |
| | | pt_text.x = left; |
| | | pt_text.y = top - 12; |
| | | pt_text_bg1.x = left; |
| | | pt_text_bg1.y = top - (10 + 25 * font_size); |
| | | pt_text_bg2.x = right; |
| | | pt_text_bg2.y = top; |
| | | CvScalar color; |
| | | color.val[0] = red * 256; |
| | | color.val[1] = green * 256; |
| | | color.val[2] = blue * 256; |
| | | |
| | | cvRectangle(show_img, pt1, pt2, color, width, 8, 0); |
| | | //printf("left=%d, right=%d, top=%d, bottom=%d, obj_id=%d, obj=%s \n", left, right, top, bot, class_id, names[class_id]); |
| | | cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, width, 8, 0); |
| | | cvRectangle(show_img, pt_text_bg1, pt_text_bg2, color, CV_FILLED, 8, 0); // filled |
| | | CvScalar black_color; |
| | | black_color.val[0] = 0; |
| | | CvFont font; |
| | | cvInitFont(&font, CV_FONT_HERSHEY_SIMPLEX, font_size, font_size, 0, font_size * 3, 8); |
| | | cvPutText(show_img, names[class_id], pt_text, &font, black_color); |
| | | } |
| | | } |
| | | } |
| | | |
| | | void draw_detections_cv(IplImage* show_img, int num, float thresh, box *boxes, float **probs, char **names, image **alphabet, int classes) |
| | | { |
| | | int i; |
| | |
| | | void draw_label(image a, int r, int c, image label, const float *rgb); |
| | | void write_label(image a, int r, int c, image *characters, char *string, float *rgb); |
| | | void draw_detections(image im, int num, float thresh, box *boxes, float **probs, char **names, image **labels, int classes); |
| | | void draw_detections_v3(image im, detection *dets, int num, float thresh, char **names, image **alphabet, int classes); |
| | | image image_distance(image a, image b); |
| | | void scale_image(image m, float s); |
| | | image crop_image(image im, int dx, int dy, int w, int h); |
| | |
| | | NETWORK, |
| | | XNOR, |
| | | REGION, |
| | | YOLO, |
| | | REORG, |
| | | UPSAMPLE, |
| | | REORG_OLD, |
| | | BLANK |
| | | } LAYER_TYPE; |
| | |
| | | int noadjust; |
| | | int reorg; |
| | | int log; |
| | | int tanh; |
| | | int *mask; |
| | | int total; |
| | | |
| | | int adam; |
| | | float B1; |
| | |
| | | float class_scale; |
| | | int bias_match; |
| | | int random; |
| | | float ignore_thresh; |
| | | float truth_thresh; |
| | | float thresh; |
| | | float focus; |
| | | int classfix; |
| | | int absolute; |
| | | |
| | |
| | | #include "dropout_layer.h" |
| | | #include "route_layer.h" |
| | | #include "shortcut_layer.h" |
| | | #include "yolo_layer.h" |
| | | |
| | | int get_current_batch(network net) |
| | | { |
| | |
| | | return out; |
| | | } |
| | | |
| | | int num_detections(network *net, float thresh) |
| | | { |
| | | int i; |
| | | int s = 0; |
| | | for (i = 0; i < net->n; ++i) { |
| | | layer l = net->layers[i]; |
| | | if (l.type == YOLO) { |
| | | s += yolo_num_detections(l, thresh); |
| | | } |
| | | if (l.type == DETECTION || l.type == REGION) { |
| | | s += l.w*l.h*l.n; |
| | | } |
| | | } |
| | | return s; |
| | | } |
| | | |
| | | detection *make_network_boxes(network *net, float thresh, int *num) |
| | | { |
| | | layer l = net->layers[net->n - 1]; |
| | | int i; |
| | | int nboxes = num_detections(net, thresh); |
| | | if (num) *num = nboxes; |
| | | detection *dets = calloc(nboxes, sizeof(detection)); |
| | | for (i = 0; i < nboxes; ++i) { |
| | | dets[i].prob = calloc(l.classes, sizeof(float)); |
| | | if (l.coords > 4) { |
| | | dets[i].mask = calloc(l.coords - 4, sizeof(float)); |
| | | } |
| | | } |
| | | return dets; |
| | | } |
| | | |
| | | |
| | | void custom_get_region_detections(layer l, int w, int h, int net_w, int net_h, float thresh, int *map, float hier, int relative, detection *dets, int letter) |
| | | { |
| | | box *boxes = calloc(l.w*l.h*l.n, sizeof(box)); |
| | | float **probs = calloc(l.w*l.h*l.n, sizeof(float *)); |
| | | int i, j; |
| | | for (j = 0; j < l.w*l.h*l.n; ++j) probs[j] = calloc(l.classes, sizeof(float *)); |
| | | get_region_boxes(l, 1, 1, thresh, probs, boxes, 0, map); |
| | | for (j = 0; j < l.w*l.h*l.n; ++j) { |
| | | dets[j].classes = l.classes; |
| | | dets[j].bbox = boxes[j]; |
| | | dets[j].objectness = 1; |
| | | for (i = 0; i < l.classes; ++i) dets[j].prob[i] = probs[j][i]; |
| | | } |
| | | |
| | | free(boxes); |
| | | free_ptrs((void **)probs, l.w*l.h*l.n); |
| | | } |
| | | |
| | | void fill_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, detection *dets, int letter) |
| | | { |
| | | int j; |
| | | for (j = 0; j < net->n; ++j) { |
| | | layer l = net->layers[j]; |
| | | if (l.type == YOLO) { |
| | | int count = get_yolo_detections(l, w, h, net->w, net->h, thresh, map, relative, dets, letter); |
| | | dets += count; |
| | | } |
| | | if (l.type == REGION) { |
| | | custom_get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets, letter); |
| | | //get_region_detections(l, w, h, net->w, net->h, thresh, map, hier, relative, dets); |
| | | dets += l.w*l.h*l.n; |
| | | } |
| | | if (l.type == DETECTION) { |
| | | get_detection_detections(l, w, h, thresh, dets); |
| | | dets += l.w*l.h*l.n; |
| | | } |
| | | } |
| | | } |
| | | |
| | | detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter) |
| | | { |
| | | detection *dets = make_network_boxes(net, thresh, num); |
| | | fill_network_boxes(net, w, h, thresh, hier, map, relative, dets, letter); |
| | | return dets; |
| | | } |
| | | |
| | | void free_detections(detection *dets, int n) |
| | | { |
| | | int i; |
| | | for (i = 0; i < n; ++i) { |
| | | free(dets[i].prob); |
| | | if (dets[i].mask) free(dets[i].mask); |
| | | } |
| | | free(dets); |
| | | } |
| | | |
| | | float *network_predict_image(network *net, image im) |
| | | { |
| | | image imr = letterbox_image(im, net->w, net->h); |
| | | set_batch_network(net, 1); |
| | | float *p = network_predict(*net, imr.data); |
| | | free_image(imr); |
| | | return p; |
| | | } |
| | | |
| | | int network_width(network *net) { return net->w; } |
| | | int network_height(network *net) { return net->h; } |
| | | |
| | | matrix network_predict_data_multi(network net, data test, int n) |
| | | { |
| | | int i,j,b,m; |
| | |
| | | void set_batch_network(network *net, int b); |
| | | int get_network_input_size(network net); |
| | | float get_network_cost(network net); |
| | | detection *get_network_boxes(network *net, int w, int h, float thresh, float hier, int *map, int relative, int *num, int letter); |
| | | |
| | | int get_network_nuisance(network net); |
| | | int get_network_background(network net); |
| | |
| | | #include "shortcut_layer.h" |
| | | #include "softmax_layer.h" |
| | | #include "utils.h" |
| | | #include "upsample_layer.h" |
| | | #include "yolo_layer.h" |
| | | #include <stdint.h> |
| | | |
| | | typedef struct{ |
| | |
| | | if (strcmp(type, "[cost]")==0) return COST; |
| | | if (strcmp(type, "[detection]")==0) return DETECTION; |
| | | if (strcmp(type, "[region]")==0) return REGION; |
| | | if (strcmp(type, "[yolo]") == 0) return YOLO; |
| | | if (strcmp(type, "[local]")==0) return LOCAL; |
| | | if (strcmp(type, "[conv]")==0 |
| | | || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL; |
| | |
| | | if (strcmp(type, "[soft]")==0 |
| | | || strcmp(type, "[softmax]")==0) return SOFTMAX; |
| | | if (strcmp(type, "[route]")==0) return ROUTE; |
| | | if (strcmp(type, "[upsample]") == 0) return UPSAMPLE; |
| | | return BLANK; |
| | | } |
| | | |
| | |
| | | return layer; |
| | | } |
| | | |
| | | int *parse_yolo_mask(char *a, int *num) |
| | | { |
| | | int *mask = 0; |
| | | if (a) { |
| | | int len = strlen(a); |
| | | int n = 1; |
| | | int i; |
| | | for (i = 0; i < len; ++i) { |
| | | if (a[i] == ',') ++n; |
| | | } |
| | | mask = calloc(n, sizeof(int)); |
| | | for (i = 0; i < n; ++i) { |
| | | int val = atoi(a); |
| | | mask[i] = val; |
| | | a = strchr(a, ',') + 1; |
| | | } |
| | | *num = n; |
| | | } |
| | | return mask; |
| | | } |
| | | |
| | | layer parse_yolo(list *options, size_params params) |
| | | { |
| | | int classes = option_find_int(options, "classes", 20); |
| | | int total = option_find_int(options, "num", 1); |
| | | int num = total; |
| | | |
| | | char *a = option_find_str(options, "mask", 0); |
| | | int *mask = parse_yolo_mask(a, &num); |
| | | layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes); |
| | | assert(l.outputs == params.inputs); |
| | | |
| | | l.max_boxes = option_find_int_quiet(options, "max", 90); |
| | | l.jitter = option_find_float(options, "jitter", .2); |
| | | |
| | | l.ignore_thresh = option_find_float(options, "ignore_thresh", .5); |
| | | l.truth_thresh = option_find_float(options, "truth_thresh", 1); |
| | | l.random = option_find_int_quiet(options, "random", 0); |
| | | |
| | | char *map_file = option_find_str(options, "map", 0); |
| | | if (map_file) l.map = read_map(map_file); |
| | | |
| | | a = option_find_str(options, "anchors", 0); |
| | | if (a) { |
| | | int len = strlen(a); |
| | | int n = 1; |
| | | int i; |
| | | for (i = 0; i < len; ++i) { |
| | | if (a[i] == ',') ++n; |
| | | } |
| | | for (i = 0; i < n; ++i) { |
| | | float bias = atof(a); |
| | | l.biases[i] = bias; |
| | | a = strchr(a, ',') + 1; |
| | | } |
| | | } |
| | | return l; |
| | | } |
| | | |
| | | layer parse_region(list *options, size_params params) |
| | | { |
| | | int coords = option_find_int(options, "coords", 4); |
| | |
| | | return l; |
| | | } |
| | | |
| | | layer parse_upsample(list *options, size_params params, network net) |
| | | { |
| | | |
| | | int stride = option_find_int(options, "stride", 2); |
| | | layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride); |
| | | l.scale = option_find_float_quiet(options, "scale", 1); |
| | | return l; |
| | | } |
| | | |
| | | route_layer parse_route(list *options, size_params params, network net) |
| | | { |
| | | char *l = option_find(options, "layers"); |
| | |
| | | l = parse_cost(options, params); |
| | | }else if(lt == REGION){ |
| | | l = parse_region(options, params); |
| | | }else if (lt == YOLO) { |
| | | l = parse_yolo(options, params); |
| | | }else if(lt == DETECTION){ |
| | | l = parse_detection(options, params); |
| | | }else if(lt == SOFTMAX){ |
| | |
| | | l = parse_avgpool(options, params); |
| | | }else if(lt == ROUTE){ |
| | | l = parse_route(options, params, net); |
| | | }else if (lt == UPSAMPLE) { |
| | | l = parse_upsample(options, params, net); |
| | | }else if(lt == SHORTCUT){ |
| | | l = parse_shortcut(options, params, net); |
| | | }else if(lt == DROPOUT){ |
| | |
| | | } else { |
| | | // Focal loss |
| | | if (focal_loss) { |
| | | // Focal Loss for Dense Object Detection: http://blog.csdn.net/linmingan/article/details/77885832 |
| | | // Focal Loss |
| | | float alpha = 0.5; // 0.25 or 0.5 |
| | | //float gamma = 2; // hardcoded in many places of the grad-formula |
| | | |
| | | int ti = index + class_id; |
| | | float grad = -2 * (1 - output[ti])*logf(fmaxf(output[ti], 0.0000001))*output[ti] + (1 - output[ti])*(1 - output[ti]); |
| | | float pt = output[ti] + 0.000000000000001F; |
| | | //float grad = -(1 - pt) * (2 * pt*logf(pt) + pt - 1); // http://blog.csdn.net/linmingan/article/details/77885832 |
| | | float grad = (1 - pt) * (2 * pt*logf(pt) + pt - 1); // https://github.com/unsky/focal-loss |
| | | |
| | | for (n = 0; n < classes; ++n) { |
| | | delta[index + n] = scale * (((n == class_id) ? 1 : 0) - output[index + n]); |
| | |
| | | return (x != x); |
| | | } |
| | | |
| | | static int entry_index(layer l, int batch, int location, int entry) |
| | | { |
| | | int n = location / (l.w*l.h); |
| | | int loc = location % (l.w*l.h); |
| | | return batch*l.outputs + n*l.w*l.h*(l.coords + l.classes + 1) + entry*l.w*l.h + loc; |
| | | } |
| | | |
| | | void softmax_tree(float *input, int batch, int inputs, float temp, tree *hierarchy, float *output); |
| | | void forward_region_layer(const region_layer l, network_state state) |
| | | { |
| | |
| | | } |
| | | #endif |
| | | |
| | | |
| | | void correct_region_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative) |
| | | { |
| | | int i; |
| | | int new_w = 0; |
| | | int new_h = 0; |
| | | if (((float)netw / w) < ((float)neth / h)) { |
| | | new_w = netw; |
| | | new_h = (h * netw) / w; |
| | | } |
| | | else { |
| | | new_h = neth; |
| | | new_w = (w * neth) / h; |
| | | } |
| | | for (i = 0; i < n; ++i) { |
| | | box b = dets[i].bbox; |
| | | b.x = (b.x - (netw - new_w) / 2. / netw) / ((float)new_w / netw); |
| | | b.y = (b.y - (neth - new_h) / 2. / neth) / ((float)new_h / neth); |
| | | b.w *= (float)netw / new_w; |
| | | b.h *= (float)neth / new_h; |
| | | if (!relative) { |
| | | b.x *= w; |
| | | b.w *= w; |
| | | b.y *= h; |
| | | b.h *= h; |
| | | } |
| | | dets[i].bbox = b; |
| | | } |
| | | } |
| | | |
| | | void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets) |
| | | { |
| | | int i, j, n, z; |
| | | float *predictions = l.output; |
| | | if (l.batch == 2) { |
| | | float *flip = l.output + l.outputs; |
| | | for (j = 0; j < l.h; ++j) { |
| | | for (i = 0; i < l.w / 2; ++i) { |
| | | for (n = 0; n < l.n; ++n) { |
| | | for (z = 0; z < l.classes + l.coords + 1; ++z) { |
| | | int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; |
| | | int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); |
| | | float swap = flip[i1]; |
| | | flip[i1] = flip[i2]; |
| | | flip[i2] = swap; |
| | | if (z == 0) { |
| | | flip[i1] = -flip[i1]; |
| | | flip[i2] = -flip[i2]; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | | for (i = 0; i < l.outputs; ++i) { |
| | | l.output[i] = (l.output[i] + flip[i]) / 2.; |
| | | } |
| | | } |
| | | for (i = 0; i < l.w*l.h; ++i) { |
| | | int row = i / l.w; |
| | | int col = i % l.w; |
| | | for (n = 0; n < l.n; ++n) { |
| | | int index = n*l.w*l.h + i; |
| | | for (j = 0; j < l.classes; ++j) { |
| | | dets[index].prob[j] = 0; |
| | | } |
| | | int obj_index = entry_index(l, 0, n*l.w*l.h + i, l.coords); |
| | | int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); |
| | | int mask_index = entry_index(l, 0, n*l.w*l.h + i, 4); |
| | | float scale = l.background ? 1 : predictions[obj_index]; |
| | | dets[index].bbox = get_region_box(predictions, l.biases, n, box_index, col, row, l.w, l.h, l.w*l.h); |
| | | dets[index].objectness = scale > thresh ? scale : 0; |
| | | if (dets[index].mask) { |
| | | for (j = 0; j < l.coords - 4; ++j) { |
| | | dets[index].mask[j] = l.output[mask_index + j*l.w*l.h]; |
| | | } |
| | | } |
| | | |
| | | int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + !l.background); |
| | | if (l.softmax_tree) { |
| | | |
| | | hierarchy_predictions(predictions + class_index, l.classes, l.softmax_tree, 0, l.w*l.h); |
| | | if (map) { |
| | | for (j = 0; j < 200; ++j) { |
| | | int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + map[j]); |
| | | float prob = scale*predictions[class_index]; |
| | | dets[index].prob[j] = (prob > thresh) ? prob : 0; |
| | | } |
| | | } |
| | | else { |
| | | int j = hierarchy_top_prediction(predictions + class_index, l.softmax_tree, tree_thresh, l.w*l.h); |
| | | dets[index].prob[j] = (scale > thresh) ? scale : 0; |
| | | } |
| | | } |
| | | else { |
| | | if (dets[index].objectness) { |
| | | for (j = 0; j < l.classes; ++j) { |
| | | int class_index = entry_index(l, 0, n*l.w*l.h + i, l.coords + 1 + j); |
| | | float prob = scale*predictions[class_index]; |
| | | dets[index].prob[j] = (prob > thresh) ? prob : 0; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | | correct_region_boxes(dets, l.w*l.h*l.n, w, h, netw, neth, relative); |
| | | } |
| | |
| | | void backward_region_layer(const region_layer l, network_state state); |
| | | void get_region_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness, int *map); |
| | | void resize_region_layer(layer *l, int w, int h); |
| | | void get_region_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, float tree_thresh, int relative, detection *dets); |
| | | |
| | | #ifdef GPU |
| | | void forward_region_layer_gpu(const region_layer l, network_state state); |
| | |
| | | } |
| | | } |
| | | |
| | | int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride) |
| | | { |
| | | float p = 1; |
| | | int group = 0; |
| | | int i; |
| | | while (1) { |
| | | float max = 0; |
| | | int max_i = 0; |
| | | |
| | | for (i = 0; i < hier->group_size[group]; ++i) { |
| | | int index = i + hier->group_offset[group]; |
| | | float val = predictions[(i + hier->group_offset[group])*stride]; |
| | | if (val > max) { |
| | | max_i = index; |
| | | max = val; |
| | | } |
| | | } |
| | | if (p*max > thresh) { |
| | | p = p*max; |
| | | group = hier->child[max_i]; |
| | | if (hier->child[max_i] < 0) return max_i; |
| | | } |
| | | else if (group == 0) { |
| | | return max_i; |
| | | } |
| | | else { |
| | | return hier->parent[hier->group_offset[group]]; |
| | | } |
| | | } |
| | | return 0; |
| | | } |
| | | |
| | | tree *read_tree(char *filename) |
| | | { |
| | | tree t = {0}; |
| | |
| | | int *leaf; |
| | | int n; |
| | | int *parent; |
| | | int *child; |
| | | int *group; |
| | | char **name; |
| | | |
| | |
| | | } tree; |
| | | |
| | | tree *read_tree(char *filename); |
| | | int hierarchy_top_prediction(float *predictions, tree *hier, float thresh, int stride); |
| | | void hierarchy_predictions(float *predictions, int n, tree *hier, int only_leaves); |
| | | void change_leaves(tree *t, char *leaf_list); |
| | | float get_hierarchy_probability(float *x, tree *hier, int c); |
| New file |
| | |
| | | #include "upsample_layer.h" |
| | | #include "cuda.h" |
| | | #include "blas.h" |
| | | |
| | | #include <stdio.h> |
| | | |
| | | layer make_upsample_layer(int batch, int w, int h, int c, int stride) |
| | | { |
| | | layer l = {0}; |
| | | l.type = UPSAMPLE; |
| | | l.batch = batch; |
| | | l.w = w; |
| | | l.h = h; |
| | | l.c = c; |
| | | l.out_w = w*stride; |
| | | l.out_h = h*stride; |
| | | l.out_c = c; |
| | | if(stride < 0){ |
| | | stride = -stride; |
| | | l.reverse=1; |
| | | l.out_w = w/stride; |
| | | l.out_h = h/stride; |
| | | } |
| | | l.stride = stride; |
| | | l.outputs = l.out_w*l.out_h*l.out_c; |
| | | l.inputs = l.w*l.h*l.c; |
| | | l.delta = calloc(l.outputs*batch, sizeof(float)); |
| | | l.output = calloc(l.outputs*batch, sizeof(float));; |
| | | |
| | | l.forward = forward_upsample_layer; |
| | | l.backward = backward_upsample_layer; |
| | | #ifdef GPU |
| | | l.forward_gpu = forward_upsample_layer_gpu; |
| | | l.backward_gpu = backward_upsample_layer_gpu; |
| | | |
| | | l.delta_gpu = cuda_make_array(l.delta, l.outputs*batch); |
| | | l.output_gpu = cuda_make_array(l.output, l.outputs*batch); |
| | | #endif |
| | | if(l.reverse) fprintf(stderr, "downsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); |
| | | else fprintf(stderr, "upsample %2dx %4d x%4d x%4d -> %4d x%4d x%4d\n", stride, w, h, c, l.out_w, l.out_h, l.out_c); |
| | | return l; |
| | | } |
| | | |
| | | void resize_upsample_layer(layer *l, int w, int h) |
| | | { |
| | | l->w = w; |
| | | l->h = h; |
| | | l->out_w = w*l->stride; |
| | | l->out_h = h*l->stride; |
| | | if(l->reverse){ |
| | | l->out_w = w/l->stride; |
| | | l->out_h = h/l->stride; |
| | | } |
| | | l->outputs = l->out_w*l->out_h*l->out_c; |
| | | l->inputs = l->h*l->w*l->c; |
| | | l->delta = realloc(l->delta, l->outputs*l->batch*sizeof(float)); |
| | | l->output = realloc(l->output, l->outputs*l->batch*sizeof(float)); |
| | | |
| | | #ifdef GPU |
| | | cuda_free(l->output_gpu); |
| | | cuda_free(l->delta_gpu); |
| | | l->output_gpu = cuda_make_array(l->output, l->outputs*l->batch); |
| | | l->delta_gpu = cuda_make_array(l->delta, l->outputs*l->batch); |
| | | #endif |
| | | |
| | | } |
| | | |
| | | void forward_upsample_layer(const layer l, network_state net) |
| | | { |
| | | fill_cpu(l.outputs*l.batch, 0, l.output, 1); |
| | | if(l.reverse){ |
| | | upsample_cpu(l.output, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, net.input); |
| | | }else{ |
| | | upsample_cpu(net.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output); |
| | | } |
| | | } |
| | | |
| | | void backward_upsample_layer(const layer l, network_state state) |
| | | { |
| | | if(l.reverse){ |
| | | upsample_cpu(l.delta, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, state.delta); |
| | | }else{ |
| | | upsample_cpu(state.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta); |
| | | } |
| | | } |
| | | |
| | | #ifdef GPU |
| | | void forward_upsample_layer_gpu(const layer l, network_state state) |
| | | { |
| | | fill_ongpu(l.outputs*l.batch, 0, l.output_gpu, 1); |
| | | if(l.reverse){ |
| | | upsample_gpu(l.output_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 0, l.scale, state.input); |
| | | }else{ |
| | | upsample_gpu(state.input, l.w, l.h, l.c, l.batch, l.stride, 1, l.scale, l.output_gpu); |
| | | } |
| | | } |
| | | |
| | | void backward_upsample_layer_gpu(const layer l, network_state state) |
| | | { |
| | | if(l.reverse){ |
| | | upsample_gpu(l.delta_gpu, l.out_w, l.out_h, l.c, l.batch, l.stride, 1, l.scale, state.delta); |
| | | }else{ |
| | | upsample_gpu(state.delta, l.w, l.h, l.c, l.batch, l.stride, 0, l.scale, l.delta_gpu); |
| | | } |
| | | } |
| | | #endif |
| New file |
| | |
| | | #ifndef UPSAMPLE_LAYER_H |
| | | #define UPSAMPLE_LAYER_H |
| | | #include "cuda.h" |
| | | #include "layer.h" |
| | | #include "network.h" |
| | | |
| | | layer make_upsample_layer(int batch, int w, int h, int c, int stride); |
| | | void forward_upsample_layer(const layer l, network net); |
| | | void backward_upsample_layer(const layer l, network net); |
| | | void resize_upsample_layer(layer *l, int w, int h); |
| | | |
| | | #ifdef GPU |
| | | void forward_upsample_layer_gpu(const layer l, network net); |
| | | void backward_upsample_layer_gpu(const layer l, network net); |
| | | #endif |
| | | |
| | | #endif |
| | |
| | | return max_i; |
| | | } |
| | | |
| | | int int_index(int *a, int val, int n) |
| | | { |
| | | int i; |
| | | for (i = 0; i < n; ++i) { |
| | | if (a[i] == val) return i; |
| | | } |
| | | return -1; |
| | | } |
| | | |
| | | int rand_int(int min, int max) |
| | | { |
| | | if (max < min){ |
| | |
| | | unsigned int random_gen(); |
| | | float random_float(); |
| | | float rand_uniform_strong(float min, float max); |
| | | int int_index(int *a, int val, int n); |
| | | |
| | | #endif |
| | | |
| New file |
| | |
| | | #include "yolo_layer.h" |
| | | #include "activations.h" |
| | | #include "blas.h" |
| | | #include "box.h" |
| | | #include "cuda.h" |
| | | #include "utils.h" |
| | | |
| | | #include <stdio.h> |
| | | #include <assert.h> |
| | | #include <string.h> |
| | | #include <stdlib.h> |
| | | |
| | | layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes) |
| | | { |
| | | int i; |
| | | layer l = {0}; |
| | | l.type = YOLO; |
| | | |
| | | l.n = n; |
| | | l.total = total; |
| | | l.batch = batch; |
| | | l.h = h; |
| | | l.w = w; |
| | | l.c = n*(classes + 4 + 1); |
| | | l.out_w = l.w; |
| | | l.out_h = l.h; |
| | | l.out_c = l.c; |
| | | l.classes = classes; |
| | | l.cost = calloc(1, sizeof(float)); |
| | | l.biases = calloc(total*2, sizeof(float)); |
| | | if(mask) l.mask = mask; |
| | | else{ |
| | | l.mask = calloc(n, sizeof(int)); |
| | | for(i = 0; i < n; ++i){ |
| | | l.mask[i] = i; |
| | | } |
| | | } |
| | | l.bias_updates = calloc(n*2, sizeof(float)); |
| | | l.outputs = h*w*n*(classes + 4 + 1); |
| | | l.inputs = l.outputs; |
| | | l.truths = 90*(4 + 1); |
| | | l.delta = calloc(batch*l.outputs, sizeof(float)); |
| | | l.output = calloc(batch*l.outputs, sizeof(float)); |
| | | for(i = 0; i < total*2; ++i){ |
| | | l.biases[i] = .5; |
| | | } |
| | | |
| | | l.forward = forward_yolo_layer; |
| | | l.backward = backward_yolo_layer; |
| | | #ifdef GPU |
| | | l.forward_gpu = forward_yolo_layer_gpu; |
| | | l.backward_gpu = backward_yolo_layer_gpu; |
| | | l.output_gpu = cuda_make_array(l.output, batch*l.outputs); |
| | | l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs); |
| | | #endif |
| | | |
| | | fprintf(stderr, "detection\n"); |
| | | srand(0); |
| | | |
| | | return l; |
| | | } |
| | | |
| | | void resize_yolo_layer(layer *l, int w, int h) |
| | | { |
| | | l->w = w; |
| | | l->h = h; |
| | | |
| | | l->outputs = h*w*l->n*(l->classes + 4 + 1); |
| | | l->inputs = l->outputs; |
| | | |
| | | l->output = realloc(l->output, l->batch*l->outputs*sizeof(float)); |
| | | l->delta = realloc(l->delta, l->batch*l->outputs*sizeof(float)); |
| | | |
| | | #ifdef GPU |
| | | cuda_free(l->delta_gpu); |
| | | cuda_free(l->output_gpu); |
| | | |
| | | l->delta_gpu = cuda_make_array(l->delta, l->batch*l->outputs); |
| | | l->output_gpu = cuda_make_array(l->output, l->batch*l->outputs); |
| | | #endif |
| | | } |
| | | |
| | | box get_yolo_box(float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, int stride) |
| | | { |
| | | box b; |
| | | b.x = (i + x[index + 0*stride]) / lw; |
| | | b.y = (j + x[index + 1*stride]) / lh; |
| | | b.w = exp(x[index + 2*stride]) * biases[2*n] / w; |
| | | b.h = exp(x[index + 3*stride]) * biases[2*n+1] / h; |
| | | return b; |
| | | } |
| | | |
| | | float delta_yolo_box(box truth, float *x, float *biases, int n, int index, int i, int j, int lw, int lh, int w, int h, float *delta, float scale, int stride) |
| | | { |
| | | box pred = get_yolo_box(x, biases, n, index, i, j, lw, lh, w, h, stride); |
| | | float iou = box_iou(pred, truth); |
| | | |
| | | float tx = (truth.x*lw - i); |
| | | float ty = (truth.y*lh - j); |
| | | float tw = log(truth.w*w / biases[2*n]); |
| | | float th = log(truth.h*h / biases[2*n + 1]); |
| | | |
| | | delta[index + 0*stride] = scale * (tx - x[index + 0*stride]); |
| | | delta[index + 1*stride] = scale * (ty - x[index + 1*stride]); |
| | | delta[index + 2*stride] = scale * (tw - x[index + 2*stride]); |
| | | delta[index + 3*stride] = scale * (th - x[index + 3*stride]); |
| | | return iou; |
| | | } |
| | | |
| | | |
| | | void delta_yolo_class(float *output, float *delta, int index, int class, int classes, int stride, float *avg_cat) |
| | | { |
| | | int n; |
| | | if (delta[index]){ |
| | | delta[index + stride*class] = 1 - output[index + stride*class]; |
| | | if(avg_cat) *avg_cat += output[index + stride*class]; |
| | | return; |
| | | } |
| | | for(n = 0; n < classes; ++n){ |
| | | delta[index + stride*n] = ((n == class)?1 : 0) - output[index + stride*n]; |
| | | if(n == class && avg_cat) *avg_cat += output[index + stride*n]; |
| | | } |
| | | } |
| | | |
| | | static int entry_index(layer l, int batch, int location, int entry) |
| | | { |
| | | int n = location / (l.w*l.h); |
| | | int loc = location % (l.w*l.h); |
| | | return batch*l.outputs + n*l.w*l.h*(4+l.classes+1) + entry*l.w*l.h + loc; |
| | | } |
| | | |
| | | void forward_yolo_layer(const layer l, network_state state) |
| | | { |
| | | int i,j,b,t,n; |
| | | memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float)); |
| | | |
| | | #ifndef GPU |
| | | for (b = 0; b < l.batch; ++b){ |
| | | for(n = 0; n < l.n; ++n){ |
| | | int index = entry_index(l, b, n*l.w*l.h, 0); |
| | | activate_array(l.output + index, 2*l.w*l.h, LOGISTIC); |
| | | index = entry_index(l, b, n*l.w*l.h, 4); |
| | | activate_array(l.output + index, (1+l.classes)*l.w*l.h, LOGISTIC); |
| | | } |
| | | } |
| | | #endif |
| | | |
| | | memset(l.delta, 0, l.outputs * l.batch * sizeof(float)); |
| | | if(!state.train) return; |
| | | float avg_iou = 0; |
| | | float recall = 0; |
| | | float recall75 = 0; |
| | | float avg_cat = 0; |
| | | float avg_obj = 0; |
| | | float avg_anyobj = 0; |
| | | int count = 0; |
| | | int class_count = 0; |
| | | *(l.cost) = 0; |
| | | for (b = 0; b < l.batch; ++b) { |
| | | for (j = 0; j < l.h; ++j) { |
| | | for (i = 0; i < l.w; ++i) { |
| | | for (n = 0; n < l.n; ++n) { |
| | | int box_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 0); |
| | | box pred = get_yolo_box(l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.w*l.h); |
| | | float best_iou = 0; |
| | | int best_t = 0; |
| | | for(t = 0; t < l.max_boxes; ++t){ |
| | | box truth = float_to_box(state.truth + t*(4 + 1) + b*l.truths, 1); |
| | | if(!truth.x) break; |
| | | float iou = box_iou(pred, truth); |
| | | if (iou > best_iou) { |
| | | best_iou = iou; |
| | | best_t = t; |
| | | } |
| | | } |
| | | int obj_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4); |
| | | avg_anyobj += l.output[obj_index]; |
| | | l.delta[obj_index] = 0 - l.output[obj_index]; |
| | | if (best_iou > l.ignore_thresh) { |
| | | l.delta[obj_index] = 0; |
| | | } |
| | | if (best_iou > l.truth_thresh) { |
| | | l.delta[obj_index] = 1 - l.output[obj_index]; |
| | | |
| | | int class = state.truth[best_t*(4 + 1) + b*l.truths + 4]; |
| | | if (l.map) class = l.map[class]; |
| | | int class_index = entry_index(l, b, n*l.w*l.h + j*l.w + i, 4 + 1); |
| | | delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, 0); |
| | | box truth = float_to_box(state.truth + best_t*(4 + 1) + b*l.truths, 1); |
| | | delta_yolo_box(truth, l.output, l.biases, l.mask[n], box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | for(t = 0; t < l.max_boxes; ++t){ |
| | | box truth = float_to_box(state.truth + t*(4 + 1) + b*l.truths, 1); |
| | | |
| | | if(!truth.x) break; |
| | | float best_iou = 0; |
| | | int best_n = 0; |
| | | i = (truth.x * l.w); |
| | | j = (truth.y * l.h); |
| | | box truth_shift = truth; |
| | | truth_shift.x = truth_shift.y = 0; |
| | | for(n = 0; n < l.total; ++n){ |
| | | box pred = {0}; |
| | | pred.w = l.biases[2*n]/ state.net.w; |
| | | pred.h = l.biases[2*n+1]/ state.net.h; |
| | | float iou = box_iou(pred, truth_shift); |
| | | if (iou > best_iou){ |
| | | best_iou = iou; |
| | | best_n = n; |
| | | } |
| | | } |
| | | |
| | | int mask_n = int_index(l.mask, best_n, l.n); |
| | | if(mask_n >= 0){ |
| | | int box_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 0); |
| | | float iou = delta_yolo_box(truth, l.output, l.biases, best_n, box_index, i, j, l.w, l.h, state.net.w, state.net.h, l.delta, (2-truth.w*truth.h), l.w*l.h); |
| | | |
| | | int obj_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4); |
| | | avg_obj += l.output[obj_index]; |
| | | l.delta[obj_index] = 1 - l.output[obj_index]; |
| | | |
| | | int class = state.truth[t*(4 + 1) + b*l.truths + 4]; |
| | | if (l.map) class = l.map[class]; |
| | | int class_index = entry_index(l, b, mask_n*l.w*l.h + j*l.w + i, 4 + 1); |
| | | delta_yolo_class(l.output, l.delta, class_index, class, l.classes, l.w*l.h, &avg_cat); |
| | | |
| | | ++count; |
| | | ++class_count; |
| | | if(iou > .5) recall += 1; |
| | | if(iou > .75) recall75 += 1; |
| | | avg_iou += iou; |
| | | } |
| | | } |
| | | } |
| | | *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2); |
| | | printf("Region %d Avg IOU: %f, Class: %f, Obj: %f, No Obj: %f, .5R: %f, .75R: %f, count: %d\n", state.index, avg_iou/count, avg_cat/class_count, avg_obj/count, avg_anyobj/(l.w*l.h*l.n*l.batch), recall/count, recall75/count, count); |
| | | } |
| | | |
| | | void backward_yolo_layer(const layer l, network_state state) |
| | | { |
| | | axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1); |
| | | } |
| | | |
| | | void correct_yolo_boxes(detection *dets, int n, int w, int h, int netw, int neth, int relative, int letter) |
| | | { |
| | | int i; |
| | | int new_w=0; |
| | | int new_h=0; |
| | | if (letter) { |
| | | if (((float)netw / w) < ((float)neth / h)) { |
| | | new_w = netw; |
| | | new_h = (h * netw) / w; |
| | | } |
| | | else { |
| | | new_h = neth; |
| | | new_w = (w * neth) / h; |
| | | } |
| | | } |
| | | else { |
| | | new_w = netw; |
| | | new_h = neth; |
| | | } |
| | | for (i = 0; i < n; ++i){ |
| | | box b = dets[i].bbox; |
| | | b.x = (b.x - (netw - new_w)/2./netw) / ((float)new_w/netw); |
| | | b.y = (b.y - (neth - new_h)/2./neth) / ((float)new_h/neth); |
| | | b.w *= (float)netw/new_w; |
| | | b.h *= (float)neth/new_h; |
| | | if(!relative){ |
| | | b.x *= w; |
| | | b.w *= w; |
| | | b.y *= h; |
| | | b.h *= h; |
| | | } |
| | | dets[i].bbox = b; |
| | | } |
| | | } |
| | | |
| | | int yolo_num_detections(layer l, float thresh) |
| | | { |
| | | int i, n; |
| | | int count = 0; |
| | | for (i = 0; i < l.w*l.h; ++i){ |
| | | for(n = 0; n < l.n; ++n){ |
| | | int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); |
| | | if(l.output[obj_index] > thresh){ |
| | | ++count; |
| | | } |
| | | } |
| | | } |
| | | return count; |
| | | } |
| | | |
| | | void avg_flipped_yolo(layer l) |
| | | { |
| | | int i,j,n,z; |
| | | float *flip = l.output + l.outputs; |
| | | for (j = 0; j < l.h; ++j) { |
| | | for (i = 0; i < l.w/2; ++i) { |
| | | for (n = 0; n < l.n; ++n) { |
| | | for(z = 0; z < l.classes + 4 + 1; ++z){ |
| | | int i1 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + i; |
| | | int i2 = z*l.w*l.h*l.n + n*l.w*l.h + j*l.w + (l.w - i - 1); |
| | | float swap = flip[i1]; |
| | | flip[i1] = flip[i2]; |
| | | flip[i2] = swap; |
| | | if(z == 0){ |
| | | flip[i1] = -flip[i1]; |
| | | flip[i2] = -flip[i2]; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | | for(i = 0; i < l.outputs; ++i){ |
| | | l.output[i] = (l.output[i] + flip[i])/2.; |
| | | } |
| | | } |
| | | |
| | | int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets, int letter) |
| | | { |
| | | int i,j,n; |
| | | float *predictions = l.output; |
| | | if (l.batch == 2) avg_flipped_yolo(l); |
| | | int count = 0; |
| | | for (i = 0; i < l.w*l.h; ++i){ |
| | | int row = i / l.w; |
| | | int col = i % l.w; |
| | | for(n = 0; n < l.n; ++n){ |
| | | int obj_index = entry_index(l, 0, n*l.w*l.h + i, 4); |
| | | float objectness = predictions[obj_index]; |
| | | if(objectness <= thresh) continue; |
| | | int box_index = entry_index(l, 0, n*l.w*l.h + i, 0); |
| | | dets[count].bbox = get_yolo_box(predictions, l.biases, l.mask[n], box_index, col, row, l.w, l.h, netw, neth, l.w*l.h); |
| | | dets[count].objectness = objectness; |
| | | dets[count].classes = l.classes; |
| | | for(j = 0; j < l.classes; ++j){ |
| | | int class_index = entry_index(l, 0, n*l.w*l.h + i, 4 + 1 + j); |
| | | float prob = objectness*predictions[class_index]; |
| | | dets[count].prob[j] = (prob > thresh) ? prob : 0; |
| | | } |
| | | ++count; |
| | | } |
| | | } |
| | | correct_yolo_boxes(dets, count, w, h, netw, neth, relative, letter); |
| | | return count; |
| | | } |
| | | |
| | | #ifdef GPU |
| | | |
| | | void forward_yolo_layer_gpu(const layer l, network_state state) |
| | | { |
| | | copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1); |
| | | int b, n; |
| | | for (b = 0; b < l.batch; ++b){ |
| | | for(n = 0; n < l.n; ++n){ |
| | | int index = entry_index(l, b, n*l.w*l.h, 0); |
| | | activate_array_ongpu(l.output_gpu + index, 2*l.w*l.h, LOGISTIC); |
| | | index = entry_index(l, b, n*l.w*l.h, 4); |
| | | activate_array_ongpu(l.output_gpu + index, (1+l.classes)*l.w*l.h, LOGISTIC); |
| | | } |
| | | } |
| | | if(!state.train || l.onlyforward){ |
| | | cuda_pull_array(l.output_gpu, l.output, l.batch*l.outputs); |
| | | return; |
| | | } |
| | | |
| | | cuda_pull_array(l.output_gpu, state.input, l.batch*l.inputs); |
| | | forward_yolo_layer(l, state); |
| | | cuda_push_array(l.delta_gpu, l.delta, l.batch*l.outputs); |
| | | } |
| | | |
| | | void backward_yolo_layer_gpu(const layer l, network_state state) |
| | | { |
| | | axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1); |
| | | } |
| | | #endif |
| | | |
| New file |
| | |
| | | #ifndef YOLO_LAYER_H |
| | | #define YOLO_LAYER_H |
| | | |
| | | //#include "darknet.h" |
| | | #include "layer.h" |
| | | #include "network.h" |
| | | |
| | | layer make_yolo_layer(int batch, int w, int h, int n, int total, int *mask, int classes); |
| | | void forward_yolo_layer(const layer l, network net); |
| | | void backward_yolo_layer(const layer l, network net); |
| | | void resize_yolo_layer(layer *l, int w, int h); |
| | | int yolo_num_detections(layer l, float thresh); |
| | | int get_yolo_detections(layer l, int w, int h, int netw, int neth, float thresh, int *map, int relative, detection *dets, int letter); |
| | | |
| | | #ifdef GPU |
| | | void forward_yolo_layer_gpu(const layer l, network net); |
| | | void backward_yolo_layer_gpu(layer l, network net); |
| | | #endif |
| | | |
| | | #endif |
| New file |
| | |
| | | |
| | | |
| | | ./darknet detector demo ./cfg/coco.data ./cfg/yolov3.cfg ./yolov3.weights test50.mp4 -i 0 -thresh 0.25 |
| | | |
| | | |
| | | |