1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
| extern "C" {
| #include "crop_layer.h"
| #include "cuda.h"
| }
|
| #define BLOCK 256
|
| __global__ void forward_crop_layer_kernel(float *input, int size, int c, int h, int w, int crop_height, int crop_width, int dh, int dw, int flip, float *output)
| {
| int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
| if(id >= size) return;
|
| int count = id;
| int j = id % crop_width;
| id /= crop_width;
| int i = id % crop_height;
| id /= crop_height;
| int k = id % c;
| id /= c;
| int b = id;
| int col = (flip) ? w - dw - j - 1 : j + dw;
| int row = i + dh;
| int index = col+w*(row+h*(k + c*b));
| output[count] = input[index];
| }
|
| extern "C" void forward_crop_layer_gpu(crop_layer layer, float *input)
| {
| int flip = (layer.flip && rand()%2);
| int dh = rand()%(layer.h - layer.crop_height);
| int dw = rand()%(layer.w - layer.crop_width);
| int size = layer.batch*layer.c*layer.crop_width*layer.crop_height;
|
| dim3 dimBlock(BLOCK, 1, 1);
| dim3 dimGrid((size-1)/BLOCK + 1, 1, 1);
|
| forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(input, size, layer.c, layer.h, layer.w,
| layer.crop_height, layer.crop_width, dh, dw, flip, layer.output_gpu);
| check_error(cudaPeekAtLastError());
| }
|
|