From 43552b6d20be48d14508eb050d6def6b8f283217 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Fri, 17 Jul 2015 20:18:05 +0000
Subject: [PATCH] nothing changed go away
---
src/blas_kernels.cu | 9 +++++----
1 files changed, 5 insertions(+), 4 deletions(-)
diff --git a/src/blas_kernels.cu b/src/blas_kernels.cu
index 2155801..0c89c47 100644
--- a/src/blas_kernels.cu
+++ b/src/blas_kernels.cu
@@ -1,6 +1,7 @@
extern "C" {
#include "blas.h"
#include "cuda.h"
+#include "utils.h"
}
__global__ void axpy_kernel(int N, float ALPHA, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY)
@@ -27,10 +28,10 @@
if(i < N) X[i*INCX] *= ALPHA;
}
-__global__ void mask_kernel(int n, float *x, float *mask)
+__global__ void mask_kernel(int n, float *x, float mask_num, float *mask)
{
int i = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
- if(i < n && mask[i] == 0) x[i] = 0;
+ if(i < n && mask[i] == mask_num) x[i] = mask_num;
}
__global__ void copy_kernel(int N, float *X, int OFFX, int INCX, float *Y, int OFFY, int INCY)
@@ -79,9 +80,9 @@
check_error(cudaPeekAtLastError());
}
-extern "C" void mask_ongpu(int N, float * X, float * mask)
+extern "C" void mask_ongpu(int N, float * X, float mask_num, float * mask)
{
- mask_kernel<<<cuda_gridsize(N), BLOCK>>>(N, X, mask);
+ mask_kernel<<<cuda_gridsize(N), BLOCK>>>(N, X, mask_num, mask);
check_error(cudaPeekAtLastError());
}
--
Gitblit v1.10.0