~speedprog/mtg/mtg_card_detector.git

			@@ -1,3 +1,7 @@
			#include "cuda_runtime.h"
			#include "curand.h"
			#include "cublas_v2.h"

			extern "C" {
			#include "crop_layer.h"
			#include "utils.h"
			@@ -5,8 +9,6 @@
			#include "image.h"
			}

			#define BLOCK 256

			__device__ float get_pixel_kernel(float *image, int w, int h, int x, int y, int c)
			{
			if(x < 0 \|\| x >= w \|\| y < 0 \|\| y >= h) return 0;
			@@ -78,7 +80,7 @@
			return make_float3(r, g, b);
			}

			__device__ float billinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c)
			__device__ float bilinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c)
			{
			int ix = (int) floorf(x);
			int iy = (int) floorf(y);
			@@ -93,7 +95,7 @@
			return val;
			}

			__global__ void levels_image_kernel(float image, float rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale)
			__global__ void levels_image_kernel(float image, float rand, int batch, int w, int h, int train, float saturation, float exposure, float translate, float scale, float shift)
			{
			int size = batch * w * h;
			int id = (blockIdx.x + blockIdx.ygridDim.x) blockDim.x + threadIdx.x;
			@@ -102,6 +104,9 @@
			id /= w;
			int y = id % h;
			id /= h;
			float rshift = rand[0];
			float gshift = rand[1];
			float bshift = rand[2];
			float r0 = rand[8*id + 0];
			float r1 = rand[8*id + 1];
			float r2 = rand[8*id + 2];
			@@ -114,19 +119,21 @@

			size_t offset = id * h * w * 3;
			image += offset;
			float r = image[x + w(y + h2)];
			float r = image[x + w(y + h0)];
			float g = image[x + w(y + h1)];
			float b = image[x + w(y + h0)];
			float b = image[x + w(y + h2)];
			float3 rgb = make_float3(r,g,b);
			if(train){
			float3 hsv = rgb_to_hsv_kernel(rgb);
			hsv.y *= saturation;
			hsv.z *= exposure;
			rgb = hsv_to_rgb_kernel(hsv);
			} else {
			shift = 0;
			}
			image[x + w(y + h2)] = rgb.x*scale + translate;
			image[x + w(y + h1)] = rgb.y*scale + translate;
			image[x + w(y + h0)] = rgb.z*scale + translate;
			image[x + w(y + h0)] = rgb.xscale + translate + (rshift - .5)shift;
			image[x + w(y + h1)] = rgb.yscale + translate + (gshift - .5)shift;
			image[x + w(y + h2)] = rgb.zscale + translate + (bshift - .5)shift;
			}

			__global__ void forward_crop_layer_kernel(float input, float rand, int size, int c, int h, int w, int crop_height, int crop_width, int train, int flip, float angle, float *output)
			@@ -170,7 +177,7 @@
			float rx = cos(angle)(x-cx) - sin(angle)(y-cy) + cx;
			float ry = sin(angle)(x-cx) + cos(angle)(y-cy) + cy;

			output[count] = billinear_interpolate_kernel(input, w, h, rx, ry, k);
			output[count] = bilinear_interpolate_kernel(input, w, h, rx, ry, k);
			}

			extern "C" void forward_crop_layer_gpu(crop_layer layer, network_state state)
			@@ -181,15 +188,19 @@

			float scale = 2;
			float translate = -1;
			if(layer.noadjust){
			scale = 1;
			translate = 0;
			}

			int size = layer.batch * layer.w * layer.h;

			levels_image_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, layer.rand_gpu, layer.batch, layer.w, layer.h, state.train, layer.saturation, layer.exposure, translate, scale);
			levels_image_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, layer.rand_gpu, layer.batch, layer.w, layer.h, state.train, layer.saturation, layer.exposure, translate, scale, layer.shift);
			check_error(cudaPeekAtLastError());

			size = layer.batchlayer.clayer.crop_width*layer.crop_height;
			size = layer.batchlayer.clayer.out_w*layer.out_h;

			forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.crop_height, layer.crop_width, state.train, layer.flip, radians, layer.output_gpu);
			forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, layer.rand_gpu, size, layer.c, layer.h, layer.w, layer.out_h, layer.out_w, state.train, layer.flip, radians, layer.output_gpu);
			check_error(cudaPeekAtLastError());

			/*