Joseph Redmon
2015-01-20 4ac78c89269138b4623993f9f1d81829d8e88131
src/gemm.c
@@ -164,8 +164,7 @@
#define TILE 64
#define TILE_K 16
#define WPT 8
#define THREADS (TILE*TILE)/(WPT*WPT)
#define THREADS 64
cl_kernel get_gemm_nn_fast_kernel()
{
@@ -175,7 +174,6 @@
        gemm_kernel = get_kernel("src/gemm_fast.cl", "gemm_nn_fast", "-D TILE=" STR(TILE)
                                                                    " -cl-nv-verbose "
                                                                    " -D TILE_K=" STR(TILE_K)
                                                                    " -D WPT=" STR(WPT)
                                                                    " -D THREADS=" STR(THREADS));
        init = 1;
    }
@@ -464,7 +462,6 @@
    test_gpu_accuracy(0,0,128,128,128); 
/*
    time_ongpu(0,0,64,2916,363); 
    time_ongpu_fast(0,0,64,2916,363); 
    time_ongpu(0,0,64,2916,363); 
@@ -483,7 +480,6 @@
    time_ongpu_fast(0,0,128,4096,12544); 
    time_ongpu(0,0,128,4096,4096); 
    time_ongpu_fast(0,0,128,4096,4096); 
    */
//    time_ongpu(1,0,2304,196,256); 
//    time_ongpu_fast(1,0,2304,196,256); 
//    time_ongpu(0,1,256,2304,196);