From 1c0fd9bb4726f28b5ccf4491b8d108b00c884ec3 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Thu, 30 Oct 2014 06:26:41 +0000
Subject: [PATCH] im2col slightly faster
---
src/axpy.cl | 8 ++++----
1 files changed, 4 insertions(+), 4 deletions(-)
diff --git a/src/axpy.cl b/src/axpy.cl
index 394d897..901a826 100644
--- a/src/axpy.cl
+++ b/src/axpy.cl
@@ -1,7 +1,7 @@
-__kernel void axpy(int N, float ALPHA, __global float *X, int INCX, __global float *Y, int INCY)
+__kernel void axpy(int N, float ALPHA, __global float *X, int OFFX, int INCX, __global float *Y, int OFFY, int INCY)
{
int i = get_global_id(0);
- Y[i*INCY] += ALPHA*X[i*INCX];
+ Y[OFFY+i*INCY] += ALPHA*X[OFFX+i*INCX];
}
__kernel void scal(int N, float ALPHA, __global float *X, int INCX)
@@ -10,9 +10,9 @@
X[i*INCX] *= ALPHA;
}
-__kernel void copy(int N, __global float *X, int INCX, __global float *Y, int INCY)
+__kernel void copy(int N, __global float *X, int OFFX, int INCX, __global float *Y, int OFFY, int INCY)
{
int i = get_global_id(0);
- Y[i*INCY] = X[i*INCX];
+ Y[i*INCY + OFFY] = X[i*INCX + OFFX];
}
--
Gitblit v1.10.0