From 68213b835b9f15cb449ad2037a8b51c17a3de07b Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Mon, 14 Mar 2016 22:10:14 +0000
Subject: [PATCH] Makefile
---
src/crop_layer_kernels.cu | 217 +++++++++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 198 insertions(+), 19 deletions(-)
diff --git a/src/crop_layer_kernels.cu b/src/crop_layer_kernels.cu
index 628c700..8a08630 100644
--- a/src/crop_layer_kernels.cu
+++ b/src/crop_layer_kernels.cu
@@ -1,15 +1,149 @@
+#include "cuda_runtime.h"
+#include "curand.h"
+#include "cublas_v2.h"
+
extern "C" {
#include "crop_layer.h"
+#include "utils.h"
#include "cuda.h"
+#include "image.h"
}
-#define BLOCK 256
+__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c)
+{
+ if(x < 0 || x >= w || y < 0 || y >= h) return 0;
+ return image[x + w*(y + c*h)];
+}
-__global__ void forward_crop_layer_kernel(float *input, int size, int c, int h, int w, int crop_height, int crop_width, int dh, int dw, int flip, float *output)
+__device__ float3 rgb_to_hsv_kernel(float3 rgb)
+{
+ float r = rgb.x;
+ float g = rgb.y;
+ float b = rgb.z;
+
+ float h, s, v;
+ float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b);
+ float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b);
+ float delta = max - min;
+ v = max;
+ if(max == 0){
+ s = 0;
+ h = -1;
+ }else{
+ s = delta/max;
+ if(r == max){
+ h = (g - b) / delta;
+ } else if (g == max) {
+ h = 2 + (b - r) / delta;
+ } else {
+ h = 4 + (r - g) / delta;
+ }
+ if (h < 0) h += 6;
+ }
+ return make_float3(h, s, v);
+}
+
+__device__ float3 hsv_to_rgb_kernel(float3 hsv)
+{
+ float h = hsv.x;
+ float s = hsv.y;
+ float v = hsv.z;
+
+ float r, g, b;
+ float f, p, q, t;
+
+ if (s == 0) {
+ r = g = b = v;
+ } else {
+ int index = (int) floorf(h);
+ f = h - index;
+ p = v*(1-s);
+ q = v*(1-s*f);
+ t = v*(1-s*(1-f));
+ if(index == 0){
+ r = v; g = t; b = p;
+ } else if(index == 1){
+ r = q; g = v; b = p;
+ } else if(index == 2){
+ r = p; g = v; b = t;
+ } else if(index == 3){
+ r = p; g = q; b = v;
+ } else if(index == 4){
+ r = t; g = p; b = v;
+ } else {
+ r = v; g = p; b = q;
+ }
+ }
+ r = (r < 0) ? 0 : ((r > 1) ? 1 : r);
+ g = (g < 0) ? 0 : ((g > 1) ? 1 : g);
+ b = (b < 0) ? 0 : ((b > 1) ? 1 : b);
+ return make_float3(r, g, b);
+}
+
+__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c)
+{
+ int ix = (int) floorf(x);
+ int iy = (int) floorf(y);
+
+ float dx = x - ix;
+ float dy = y - iy;
+
+ float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) +
+ dy * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) +
+ (1-dy) * dx * get_pixel_kernel(image, w, h, ix+1, iy, c) +
+ dy * dx * get_pixel_kernel(image, w, h, ix+1, iy+1, c);
+ return val;
+}
+
+__global__ void levels_image_kernel(float *image, float *rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift)
+{
+ int size = batch * w * h;
+ int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+ if(id >= size) return;
+ int x = id % w;
+ id /= w;
+ int y = id % h;
+ id /= h;
+ float rshift = rand[0];
+ float gshift = rand[1];
+ float bshift = rand[2];
+ float r0 = rand[8*id + 0];
+ float r1 = rand[8*id + 1];
+ float r2 = rand[8*id + 2];
+ float r3 = rand[8*id + 3];
+
+ saturation = r0*(saturation - 1) + 1;
+ saturation = (r1 > .5) ? 1./saturation : saturation;
+ exposure = r2*(exposure - 1) + 1;
+ exposure = (r3 > .5) ? 1./exposure : exposure;
+
+ size_t offset = id * h * w * 3;
+ image += offset;
+ float r = image[x + w*(y + h*0)];
+ float g = image[x + w*(y + h*1)];
+ float b = image[x + w*(y + h*2)];
+ float3 rgb = make_float3(r,g,b);
+ if(train){
+ float3 hsv = rgb_to_hsv_kernel(rgb);
+ hsv.y *= saturation;
+ hsv.z *= exposure;
+ rgb = hsv_to_rgb_kernel(hsv);
+ } else {
+ shift = 0;
+ }
+ image[x + w*(y + h*0)] = rgb.x*scale + translate + (rshift - .5)*shift;
+ image[x + w*(y + h*1)] = rgb.y*scale + translate + (gshift - .5)*shift;
+ image[x + w*(y + h*2)] = rgb.z*scale + translate + (bshift - .5)*shift;
+}
+
+__global__ void forward_crop_layer_kernel(float *input, float *rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output)
{
int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
if(id >= size) return;
+ float cx = w/2.;
+ float cy = h/2.;
+
int count = id;
int j = id % crop_width;
id /= crop_width;
@@ -18,29 +152,74 @@
int k = id % c;
id /= c;
int b = id;
- int col = (flip) ? w - dw - j - 1 : j + dw;
- int row = i + dh;
- int index = col+w*(row+h*(k + c*b));
- output[count] = input[index];
+
+ float r4 = rand[8*b + 4];
+ float r5 = rand[8*b + 5];
+ float r6 = rand[8*b + 6];
+ float r7 = rand[8*b + 7];
+
+ float dw = (w - crop_width)*r4;
+ float dh = (h - crop_height)*r5;
+ flip = (flip && (r6 > .5));
+ angle = 2*angle*r7 - angle;
+ if(!train){
+ dw = (w - crop_width)/2.;
+ dh = (h - crop_height)/2.;
+ flip = 0;
+ angle = 0;
+ }
+
+ input += w*h*c*b;
+
+ float x = (flip) ? w - dw - j - 1 : j + dw;
+ float y = i + dh;
+
+ float rx = cos(angle)*(x-cx) - sin(angle)*(y-cy) + cx;
+ float ry = sin(angle)*(x-cx) + cos(angle)*(y-cy) + cy;
+
+ output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k);
}
-extern "C" void forward_crop_layer_gpu(crop_layer layer, int train, float *input)
+extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state)
{
- int flip = (layer.flip && rand()%2);
- int dh = rand()%(layer.h - layer.crop_height + 1);
- int dw = rand()%(layer.w - layer.crop_width + 1);
- if(!train){
- flip = 0;
- dh = (layer.h - layer.crop_height)/2;
- dw = (layer.w - layer.crop_width)/2;
+ cuda_random(layer.rand_gpu, layer.batch*8);
+
+ float radians = layer.angle*3.14159265/180.;
+
+ float scale = 2;
+ float translate = -1;
+ if(layer.noadjust){
+ scale = 1;
+ translate = 0;
}
- int size = layer.batch*layer.c*layer.crop_width*layer.crop_height;
- dim3 dimBlock(BLOCK, 1, 1);
- dim3 dimGrid((size-1)/BLOCK + 1, 1, 1);
+ int size = layer.batch * layer.w * layer.h;
- forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(input, size, layer.c, layer.h, layer.w,
- layer.crop_height, layer.crop_width, dh, dw, flip, layer.output_gpu);
+ levels_image_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, layer.rand_gpu, layer.batch, layer.w, layer.h, state.train, layer.saturation, layer.exposure, translate, scale, layer.shift);
check_error(cudaPeekAtLastError());
+
+ size = layer.batch*layer.c*layer.out_w*layer.out_h;
+
+ forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, state.train, layer.flip, radians, layer.output_gpu);
+ check_error(cudaPeekAtLastError());
+
+/*
+ cuda_pull_array(layer.output_gpu, layer.output, size);
+ image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch));
+ image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch));
+ image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch));
+
+ translate_image(im, -translate);
+ scale_image(im, 1/scale);
+ translate_image(im2, -translate);
+ scale_image(im2, 1/scale);
+ translate_image(im3, -translate);
+ scale_image(im3, 1/scale);
+
+ show_image(im, "cropped");
+ show_image(im2, "cropped2");
+ show_image(im3, "cropped3");
+ cvWaitKey(0);
+ */
}
--
Gitblit v1.10.0