| | |
| | | |
| | | #define TILE 64 |
| | | #define TILE_K 16 |
| | | #define WPT 8 |
| | | #define THREADS (TILE*TILE)/(WPT*WPT) |
| | | #define THREADS 64 |
| | | |
| | | cl_kernel get_gemm_nn_fast_kernel() |
| | | { |
| | |
| | | gemm_kernel = get_kernel("src/gemm_fast.cl", "gemm_nn_fast", "-D TILE=" STR(TILE) |
| | | " -cl-nv-verbose " |
| | | " -D TILE_K=" STR(TILE_K) |
| | | " -D WPT=" STR(WPT) |
| | | " -D THREADS=" STR(THREADS)); |
| | | init = 1; |
| | | } |
| | |
| | | |
| | | test_gpu_accuracy(0,0,128,128,128); |
| | | |
| | | /* |
| | | time_ongpu(0,0,64,2916,363); |
| | | time_ongpu_fast(0,0,64,2916,363); |
| | | time_ongpu(0,0,64,2916,363); |
| | |
| | | time_ongpu_fast(0,0,128,4096,12544); |
| | | time_ongpu(0,0,128,4096,4096); |
| | | time_ongpu_fast(0,0,128,4096,4096); |
| | | */ |
| | | // time_ongpu(1,0,2304,196,256); |
| | | // time_ongpu_fast(1,0,2304,196,256); |
| | | // time_ongpu(0,1,256,2304,196); |