Joseph Redmon
2014-12-16 884045091b3a22d4dda3a9d743d076367c840ef7
src/opencl.c
@@ -1,3 +1,4 @@
int gpu_index;
#ifdef GPU
#include <stdio.h>
#include <stdlib.h>
@@ -31,11 +32,36 @@
{
    cl_info info;
    info.initialized = 0;
    if(index < 0) error("Won't initialize negative gpu id\n");
    cl_uint num_platforms, num_devices;
    // Fetch the Platform and Device IDs; we only want one.
    cl_device_id devices[MAX_DEVICES];
    info.error=clGetPlatformIDs(1, &info.platform, &num_platforms);
    check_error(info);
    info.error=clGetDeviceIDs(info.platform, CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &num_devices);
    check_error(info);
    index = index%num_devices;
    info.device = devices[index];
    check_error(info);
    cl_context_properties properties[]={
        CL_CONTEXT_PLATFORM, (cl_context_properties)info.platform, 0};
    // Note that nVidia's OpenCL requires the platform property
    info.context=clCreateContext(properties, 1, &info.device, 0, 0, &info.error);
    check_error(info);
    info.queue = clCreateCommandQueue(info.context, info.device, 0, &info.error);
    check_error(info);
#ifdef CLBLAS
    info.error = clblasSetup();
#endif
    check_error(info);
    info.initialized = 1;
#ifdef VERBOSE
    printf("=== %d OpenCL platform(s) found: ===\n", num_platforms);
    char buffer[10240];
    clGetPlatformInfo(info.platform, CL_PLATFORM_PROFILE, 10240, buffer, NULL);
@@ -48,14 +74,12 @@
    printf("  VENDOR = %s\n", buffer);
    clGetPlatformInfo(info.platform, CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL);
    printf("  EXTENSIONS = %s\n", buffer);
    check_error(info);
    info.error=clGetDeviceIDs(info.platform, CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &num_devices);
    if(num_devices > MAX_DEVICES) num_devices = MAX_DEVICES;
    printf("=== %d OpenCL device(s) found on platform:\n", num_devices);
    int i;
    for (i=0; i<num_devices; i++)
    {
    for (i=0; i<num_devices; i++){
        char buffer[10240];
        cl_uint buf_uint;
        cl_ulong buf_ulong;
@@ -85,27 +109,10 @@
        size_t workitem_size[10];
        clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, 10*sizeof(workitem_size), workitem_size, NULL);
        printf("  DEVICE_MAX_WORK_ITEM_SIZES = %u / %u / %u \n", (unsigned int)workitem_size[0], (unsigned int)workitem_size[1], (unsigned int)workitem_size[2]);
        printf("%d devices, %d index\n", num_devices, index);
    }
    index = index%num_devices;
    printf("%d rand, %d devices, %d index\n", getpid(), num_devices, index);
    info.device = devices[index];
    fprintf(stderr, "Found %d device(s)\n", num_devices);
    check_error(info);
    cl_context_properties properties[]={
       CL_CONTEXT_PLATFORM, (cl_context_properties)info.platform,
       0};
    // Note that nVidia's OpenCL requires the platform property
    info.context=clCreateContext(properties, 1, &info.device, 0, 0, &info.error);
    check_error(info);
    info.queue = clCreateCommandQueue(info.context, info.device, 0, &info.error);
    check_error(info);
    #ifdef CLBLAS
    info.error = clblasSetup();
    #endif
    check_error(info);
    info.initialized = 1;
    return info;
}
@@ -134,11 +141,11 @@
   return prog;
}
void cl_setup(int index)
void cl_setup()
{
   if(!cl.initialized){
        printf("initializing\n");
      cl = cl_init(index);
        cl = cl_init(gpu_index);
   }
}
@@ -191,6 +198,7 @@
cl_mem cl_make_array(float *x, int n)
{
    if(gpu_index < 0) return 0;
    cl_mem mem = clCreateBuffer(cl.context,
            CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
            sizeof(float)*n, x, &cl.error);