| | |
| | | int gpu_index; |
| | | #ifdef GPU |
| | | #include <stdio.h> |
| | | #include <stdlib.h> |
| | |
| | | { |
| | | cl_info info; |
| | | info.initialized = 0; |
| | | if(index < 0) error("Won't initialize negative gpu id\n"); |
| | | cl_uint num_platforms, num_devices; |
| | | // Fetch the Platform and Device IDs; we only want one. |
| | | cl_device_id devices[MAX_DEVICES]; |
| | | info.error=clGetPlatformIDs(1, &info.platform, &num_platforms); |
| | | check_error(info); |
| | | |
| | | info.error=clGetDeviceIDs(info.platform, CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &num_devices); |
| | | check_error(info); |
| | | |
| | | index = index%num_devices; |
| | | info.device = devices[index]; |
| | | check_error(info); |
| | | |
| | | cl_context_properties properties[]={ |
| | | CL_CONTEXT_PLATFORM, (cl_context_properties)info.platform, 0}; |
| | | |
| | | // Note that nVidia's OpenCL requires the platform property |
| | | info.context=clCreateContext(properties, 1, &info.device, 0, 0, &info.error); |
| | | check_error(info); |
| | | |
| | | info.queue = clCreateCommandQueue(info.context, info.device, 0, &info.error); |
| | | check_error(info); |
| | | #ifdef CLBLAS |
| | | info.error = clblasSetup(); |
| | | #endif |
| | | check_error(info); |
| | | info.initialized = 1; |
| | | |
| | | #ifdef VERBOSE |
| | | printf("=== %d OpenCL platform(s) found: ===\n", num_platforms); |
| | | char buffer[10240]; |
| | | clGetPlatformInfo(info.platform, CL_PLATFORM_PROFILE, 10240, buffer, NULL); |
| | |
| | | printf(" VENDOR = %s\n", buffer); |
| | | clGetPlatformInfo(info.platform, CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL); |
| | | printf(" EXTENSIONS = %s\n", buffer); |
| | | |
| | | check_error(info); |
| | | info.error=clGetDeviceIDs(info.platform, CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &num_devices); |
| | | |
| | | if(num_devices > MAX_DEVICES) num_devices = MAX_DEVICES; |
| | | printf("=== %d OpenCL device(s) found on platform:\n", num_devices); |
| | | int i; |
| | | for (i=0; i<num_devices; i++) |
| | | { |
| | | for (i=0; i<num_devices; i++){ |
| | | char buffer[10240]; |
| | | cl_uint buf_uint; |
| | | cl_ulong buf_ulong; |
| | |
| | | size_t workitem_size[10]; |
| | | clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, 10*sizeof(workitem_size), workitem_size, NULL); |
| | | printf(" DEVICE_MAX_WORK_ITEM_SIZES = %u / %u / %u \n", (unsigned int)workitem_size[0], (unsigned int)workitem_size[1], (unsigned int)workitem_size[2]); |
| | | printf("%d devices, %d index\n", num_devices, index); |
| | | |
| | | } |
| | | index = index%num_devices; |
| | | printf("%d rand, %d devices, %d index\n", getpid(), num_devices, index); |
| | | info.device = devices[index]; |
| | | fprintf(stderr, "Found %d device(s)\n", num_devices); |
| | | check_error(info); |
| | | |
| | | cl_context_properties properties[]={ |
| | | CL_CONTEXT_PLATFORM, (cl_context_properties)info.platform, |
| | | 0}; |
| | | // Note that nVidia's OpenCL requires the platform property |
| | | info.context=clCreateContext(properties, 1, &info.device, 0, 0, &info.error); |
| | | check_error(info); |
| | | info.queue = clCreateCommandQueue(info.context, info.device, 0, &info.error); |
| | | check_error(info); |
| | | #ifdef CLBLAS |
| | | info.error = clblasSetup(); |
| | | #endif |
| | | check_error(info); |
| | | info.initialized = 1; |
| | | return info; |
| | | } |
| | | |
| | |
| | | return prog; |
| | | } |
| | | |
| | | void cl_setup(int index) |
| | | void cl_setup() |
| | | { |
| | | if(!cl.initialized){ |
| | | printf("initializing\n"); |
| | | cl = cl_init(index); |
| | | cl = cl_init(gpu_index); |
| | | } |
| | | } |
| | | |
| | |
| | | |
| | | cl_mem cl_make_array(float *x, int n) |
| | | { |
| | | if(gpu_index < 0) return 0; |
| | | cl_mem mem = clCreateBuffer(cl.context, |
| | | CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, |
| | | sizeof(float)*n, x, &cl.error); |