From 390a0cf923cee683e5be300390c736a2ab9b7fd5 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Sat, 11 Apr 2015 08:24:07 +0000
Subject: [PATCH] not much changed...

---
 src/crop_layer_kernels.cu |   46 ++++++++++++++++++++++++++++++++++++++++------
 1 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/src/crop_layer_kernels.cu b/src/crop_layer_kernels.cu
index 8c97f35..ca6bb20 100644
--- a/src/crop_layer_kernels.cu
+++ b/src/crop_layer_kernels.cu
@@ -1,15 +1,41 @@
 extern "C" {
 #include "crop_layer.h"
+#include "utils.h"
 #include "cuda.h"
+#include "image.h"
 }
 
 #define BLOCK 256
 
-__global__ void forward_crop_layer_kernel(float *input, int size, int c, int h, int w, int crop_height, int crop_width, int dh, int dw, int flip, float *output)
+__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c)
+{
+    if(x < 0 || x >= w || y < 0 || y >= h) return 0;
+    return image[x + w*(y + c*h)];
+}
+
+__device__ float billinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c)
+{
+    int ix = (int) floorf(x);
+    int iy = (int) floorf(y);
+
+    float dx = x - ix;
+    float dy = y - iy;
+
+    float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + 
+                dy     * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + 
+                (1-dy) *   dx   * get_pixel_kernel(image, w, h, ix+1, iy, c) +
+                dy     *   dx   * get_pixel_kernel(image, w, h, ix+1, iy+1, c);
+    return val;
+}
+
+__global__ void forward_crop_layer_kernel(float *input, int size, int c, int h, int w, int crop_height, int crop_width, int dh, int dw, int flip, float angle, float *output)
 {
     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
     if(id >= size) return;
 
+    float cx = w/2.;
+    float cy = h/2.;
+
     int count = id;
     int j = id % crop_width;
     id /= crop_width;
@@ -18,10 +44,16 @@
     int k = id % c;
     id /= c;
     int b = id;
-    int col = (flip) ? w - dw - j - 1 : j + dw;    
-    int row = i + dh;
-    int index = col+w*(row+h*(k + c*b)); 
-    output[count] = input[index];
+
+    input += w*h*c*b;
+
+    int x = (flip) ? w - dw - j - 1 : j + dw;    
+    int y = i + dh;
+
+    float rx = cos(angle)*(x-cx) - sin(angle)*(y-cy) + cx;
+    float ry = sin(angle)*(x-cx) + cos(angle)*(y-cy) + cy;
+
+    output[count] = billinear_interpolate_kernel(input, w, h, rx, ry, k);
 }
 
 extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state)
@@ -29,7 +61,9 @@
     int flip = (layer.flip && rand()%2);
     int dh = rand()%(layer.h - layer.crop_height + 1);
     int dw = rand()%(layer.w - layer.crop_width + 1);
+    float angle = rand_uniform() - .5;
     if(!state.train){
+        angle = 0;
         flip = 0;
         dh = (layer.h - layer.crop_height)/2;
         dw = (layer.w - layer.crop_width)/2;
@@ -40,7 +74,7 @@
     dim3 dimGrid((size-1)/BLOCK + 1, 1, 1);
 
     forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, size, layer.c, layer.h, layer.w,
-                        layer.crop_height, layer.crop_width, dh, dw, flip, layer.output_gpu);
+                        layer.crop_height, layer.crop_width, dh, dw, flip, angle, layer.output_gpu);
     check_error(cudaPeekAtLastError());
 }
 

--
Gitblit v1.10.0