From 884045091b3a22d4dda3a9d743d076367c840ef7 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Tue, 16 Dec 2014 23:34:10 +0000
Subject: [PATCH] lots of cleaning
---
src/opencl.c | 114 ++++++++++++++++++++++++++++++--------------------------
1 files changed, 61 insertions(+), 53 deletions(-)
diff --git a/src/opencl.c b/src/opencl.c
index 994b8d6..2a543bc 100644
--- a/src/opencl.c
+++ b/src/opencl.c
@@ -1,3 +1,4 @@
+int gpu_index;
#ifdef GPU
#include <stdio.h>
#include <stdlib.h>
@@ -31,11 +32,36 @@
{
cl_info info;
info.initialized = 0;
+ if(index < 0) error("Won't initialize negative gpu id\n");
cl_uint num_platforms, num_devices;
// Fetch the Platform and Device IDs; we only want one.
cl_device_id devices[MAX_DEVICES];
info.error=clGetPlatformIDs(1, &info.platform, &num_platforms);
+ check_error(info);
+ info.error=clGetDeviceIDs(info.platform, CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &num_devices);
+ check_error(info);
+
+ index = index%num_devices;
+ info.device = devices[index];
+ check_error(info);
+
+ cl_context_properties properties[]={
+ CL_CONTEXT_PLATFORM, (cl_context_properties)info.platform, 0};
+
+ // Note that nVidia's OpenCL requires the platform property
+ info.context=clCreateContext(properties, 1, &info.device, 0, 0, &info.error);
+ check_error(info);
+
+ info.queue = clCreateCommandQueue(info.context, info.device, 0, &info.error);
+ check_error(info);
+#ifdef CLBLAS
+ info.error = clblasSetup();
+#endif
+ check_error(info);
+ info.initialized = 1;
+
+#ifdef VERBOSE
printf("=== %d OpenCL platform(s) found: ===\n", num_platforms);
char buffer[10240];
clGetPlatformInfo(info.platform, CL_PLATFORM_PROFILE, 10240, buffer, NULL);
@@ -48,14 +74,12 @@
printf(" VENDOR = %s\n", buffer);
clGetPlatformInfo(info.platform, CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL);
printf(" EXTENSIONS = %s\n", buffer);
-
check_error(info);
- info.error=clGetDeviceIDs(info.platform, CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &num_devices);
+
if(num_devices > MAX_DEVICES) num_devices = MAX_DEVICES;
printf("=== %d OpenCL device(s) found on platform:\n", num_devices);
int i;
- for (i=0; i<num_devices; i++)
- {
+ for (i=0; i<num_devices; i++){
char buffer[10240];
cl_uint buf_uint;
cl_ulong buf_ulong;
@@ -80,74 +104,57 @@
printf(" DEVICE_MAX_WORK_GROUP_SIZE = %llu\n", (unsigned long long)buf_ulong);
cl_uint items;
clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint),
- &items, NULL);
+ &items, NULL);
printf(" DEVICE_MAX_WORK_ITEM_DIMENSIONS = %u\n", (unsigned int)items);
size_t workitem_size[10];
clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, 10*sizeof(workitem_size), workitem_size, NULL);
printf(" DEVICE_MAX_WORK_ITEM_SIZES = %u / %u / %u \n", (unsigned int)workitem_size[0], (unsigned int)workitem_size[1], (unsigned int)workitem_size[2]);
+ printf("%d devices, %d index\n", num_devices, index);
}
- index = index%num_devices;
- printf("%d rand, %d devices, %d index\n", getpid(), num_devices, index);
- info.device = devices[index];
- fprintf(stderr, "Found %d device(s)\n", num_devices);
- check_error(info);
-
- cl_context_properties properties[]={
- CL_CONTEXT_PLATFORM, (cl_context_properties)info.platform,
- 0};
- // Note that nVidia's OpenCL requires the platform property
- info.context=clCreateContext(properties, 1, &info.device, 0, 0, &info.error);
- check_error(info);
- info.queue = clCreateCommandQueue(info.context, info.device, 0, &info.error);
- check_error(info);
- #ifdef CLBLAS
- info.error = clblasSetup();
- #endif
- check_error(info);
- info.initialized = 1;
+#endif
return info;
}
cl_program cl_fprog(char *filename, char *options, cl_info info)
{
- size_t srcsize;
- char src[64*1024];
- memset(src, 0, 64*1024);
- FILE *fil=fopen(filename,"r");
+ size_t srcsize;
+ char src[64*1024];
+ memset(src, 0, 64*1024);
+ FILE *fil=fopen(filename,"r");
if(fil == 0) file_error(filename);
- srcsize=fread(src, sizeof src, 1, fil);
- fclose(fil);
- const char *srcptr[]={src};
- // Submit the source code of the example kernel to OpenCL
- cl_program prog=clCreateProgramWithSource(info.context,1, srcptr, &srcsize, &info.error);
- check_error(info);
- char build_c[1024*64];
- // and compile it (after this we could extract the compiled version)
- info.error=clBuildProgram(prog, 0, 0, options, 0, 0);
- if ( info.error != CL_SUCCESS ) {
- fprintf(stderr, "Error Building Program: %d\n", info.error);
- clGetProgramBuildInfo( prog, info.device, CL_PROGRAM_BUILD_LOG, 1024*64, build_c, 0);
- fprintf(stderr, "Build Log for %s program:\n%s\n", filename, build_c);
- }
- check_error(info);
- return prog;
+ srcsize=fread(src, sizeof src, 1, fil);
+ fclose(fil);
+ const char *srcptr[]={src};
+ // Submit the source code of the example kernel to OpenCL
+ cl_program prog=clCreateProgramWithSource(info.context,1, srcptr, &srcsize, &info.error);
+ check_error(info);
+ char build_c[1024*64];
+ // and compile it (after this we could extract the compiled version)
+ info.error=clBuildProgram(prog, 0, 0, options, 0, 0);
+ if ( info.error != CL_SUCCESS ) {
+ fprintf(stderr, "Error Building Program: %d\n", info.error);
+ clGetProgramBuildInfo( prog, info.device, CL_PROGRAM_BUILD_LOG, 1024*64, build_c, 0);
+ fprintf(stderr, "Build Log for %s program:\n%s\n", filename, build_c);
+ }
+ check_error(info);
+ return prog;
}
-void cl_setup(int index)
+void cl_setup()
{
- if(!cl.initialized){
+ if(!cl.initialized){
printf("initializing\n");
- cl = cl_init(index);
- }
+ cl = cl_init(gpu_index);
+ }
}
cl_kernel get_kernel(char *filename, char *kernelname, char *options)
{
- cl_program prog = cl_fprog(filename, options, cl);
- cl_kernel kernel=clCreateKernel(prog, kernelname, &cl.error);
- check_error(cl);
- return kernel;
+ cl_program prog = cl_fprog(filename, options, cl);
+ cl_kernel kernel=clCreateKernel(prog, kernelname, &cl.error);
+ check_error(cl);
+ return kernel;
}
void cl_read_array(cl_mem mem, float *x, int n)
@@ -158,7 +165,7 @@
float cl_checksum(cl_mem mem, int n)
{
-
+
float *x = calloc(n, sizeof(float));
cl_read_array(mem, x, n);
float sum = sum_array(x, n);
@@ -191,6 +198,7 @@
cl_mem cl_make_array(float *x, int n)
{
+ if(gpu_index < 0) return 0;
cl_mem mem = clCreateBuffer(cl.context,
CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
sizeof(float)*n, x, &cl.error);
--
Gitblit v1.10.0