From 1edcf73a73d2007afc61289245763f5cf0c29e10 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Thu, 04 Dec 2014 07:20:29 +0000
Subject: [PATCH] Detection good, split up col images

---
 src/opencl.c |  108 ++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 files changed, 97 insertions(+), 11 deletions(-)

diff --git a/src/opencl.c b/src/opencl.c
index 8f9edd3..981067a 100644
--- a/src/opencl.c
+++ b/src/opencl.c
@@ -1,18 +1,26 @@
 #ifdef GPU
-#include "opencl.h"
 #include <stdio.h>
 #include <stdlib.h>
 #include <string.h>
 #include <time.h>
 #include <unistd.h>
 
+#ifdef CLBLAS
+#include <clBLAS.h>
+#endif
+
+#include "opencl.h"
+#include "utils.h"
+#include "activations.h"
 
 cl_info cl = {0};
 
 void check_error(cl_info info)
 {
+   // clFinish(cl.queue);
     if (info.error != CL_SUCCESS) {
         printf("\n Error number %d", info.error);
+        abort();
         exit(1);
     }
 }
@@ -27,13 +35,62 @@
     // Fetch the Platform and Device IDs; we only want one.
     cl_device_id devices[MAX_DEVICES];
     info.error=clGetPlatformIDs(1, &info.platform, &num_platforms);
+
+    printf("=== %d OpenCL platform(s) found: ===\n", num_platforms);
+    char buffer[10240];
+    clGetPlatformInfo(info.platform, CL_PLATFORM_PROFILE, 10240, buffer, NULL);
+    printf("  PROFILE = %s\n", buffer);
+    clGetPlatformInfo(info.platform, CL_PLATFORM_VERSION, 10240, buffer, NULL);
+    printf("  VERSION = %s\n", buffer);
+    clGetPlatformInfo(info.platform, CL_PLATFORM_NAME, 10240, buffer, NULL);
+    printf("  NAME = %s\n", buffer);
+    clGetPlatformInfo(info.platform, CL_PLATFORM_VENDOR, 10240, buffer, NULL);
+    printf("  VENDOR = %s\n", buffer);
+    clGetPlatformInfo(info.platform, CL_PLATFORM_EXTENSIONS, 10240, buffer, NULL);
+    printf("  EXTENSIONS = %s\n", buffer);
+
     check_error(info);
     info.error=clGetDeviceIDs(info.platform, CL_DEVICE_TYPE_ALL, MAX_DEVICES, devices, &num_devices);
     if(num_devices > MAX_DEVICES) num_devices = MAX_DEVICES;
+    printf("=== %d OpenCL device(s) found on platform:\n", num_devices);
+    int i;
+    for (i=0; i<num_devices; i++)
+    {
+        char buffer[10240];
+        cl_uint buf_uint;
+        cl_ulong buf_ulong;
+        printf("  -- %d --\n", i);
+        clGetDeviceInfo(devices[i], CL_DEVICE_NAME, sizeof(buffer), buffer, NULL);
+        printf("  DEVICE_NAME = %s\n", buffer);
+        clGetDeviceInfo(devices[i], CL_DEVICE_VENDOR, sizeof(buffer), buffer, NULL);
+        printf("  DEVICE_VENDOR = %s\n", buffer);
+        clGetDeviceInfo(devices[i], CL_DEVICE_VERSION, sizeof(buffer), buffer, NULL);
+        printf("  DEVICE_VERSION = %s\n", buffer);
+        clGetDeviceInfo(devices[i], CL_DRIVER_VERSION, sizeof(buffer), buffer, NULL);
+        printf("  DRIVER_VERSION = %s\n", buffer);
+        clGetDeviceInfo(devices[i], CL_DEVICE_MAX_COMPUTE_UNITS, sizeof(buf_uint), &buf_uint, NULL);
+        printf("  DEVICE_MAX_COMPUTE_UNITS = %u\n", (unsigned int)buf_uint);
+        clGetDeviceInfo(devices[i], CL_DEVICE_MAX_CLOCK_FREQUENCY, sizeof(buf_uint), &buf_uint, NULL);
+        printf("  DEVICE_MAX_CLOCK_FREQUENCY = %u\n", (unsigned int)buf_uint);
+        clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, NULL);
+        printf("  DEVICE_GLOBAL_MEM_SIZE = %llu\n", (unsigned long long)buf_ulong);
+        clGetDeviceInfo(devices[i], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(buf_ulong), &buf_ulong, NULL);
+        printf("  DEVICE_MAX_MEM_ALLOC_SIZE = %llu\n", (unsigned long long)buf_ulong);
+        clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(buf_ulong), &buf_ulong, NULL);
+        printf("  DEVICE_MAX_WORK_GROUP_SIZE = %llu\n", (unsigned long long)buf_ulong);
+        cl_uint items;
+        clGetDeviceInfo( devices[i], CL_DEVICE_MAX_WORK_ITEM_DIMENSIONS, sizeof(cl_uint), 
+                                       &items, NULL);
+        printf("  DEVICE_MAX_WORK_ITEM_DIMENSIONS = %u\n", (unsigned int)items);
+        size_t workitem_size[10];
+        clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_ITEM_SIZES, 10*sizeof(workitem_size), workitem_size, NULL);
+        printf("  DEVICE_MAX_WORK_ITEM_SIZES = %u / %u / %u \n", (unsigned int)workitem_size[0], (unsigned int)workitem_size[1], (unsigned int)workitem_size[2]);
+
+    }
     int index = getpid()%num_devices;
+    index = 0;
     printf("%d rand, %d devices, %d index\n", getpid(), num_devices, index);
-    //info.device = devices[index];
-    info.device = devices[1];
+    info.device = devices[index];
     fprintf(stderr, "Found %d device(s)\n", num_devices);
     check_error(info);
 
@@ -45,6 +102,10 @@
     check_error(info);
     info.queue = clCreateCommandQueue(info.context, info.device, 0, &info.error);
     check_error(info);
+    #ifdef CLBLAS
+    info.error = clblasSetup();
+    #endif
+    check_error(info);
     info.initialized = 1;
     return info;
 }
@@ -52,21 +113,22 @@
 cl_program cl_fprog(char *filename, char *options, cl_info info)
 {
 	size_t srcsize;
-	char src[8192];
-	memset(src, 0, 8192);
+	char src[64*1024];
+	memset(src, 0, 64*1024);
 	FILE *fil=fopen(filename,"r");
+    if(fil == 0) file_error(filename);
 	srcsize=fread(src, sizeof src, 1, fil);
 	fclose(fil);
 	const char *srcptr[]={src};
 	// Submit the source code of the example kernel to OpenCL
 	cl_program prog=clCreateProgramWithSource(info.context,1, srcptr, &srcsize, &info.error);
 	check_error(info);
-	char build_c[4096];
+	char build_c[1024*64];
 	// and compile it (after this we could extract the compiled version)
 	info.error=clBuildProgram(prog, 0, 0, options, 0, 0);
 	if ( info.error != CL_SUCCESS ) {
 		fprintf(stderr, "Error Building Program: %d\n", info.error);
-		clGetProgramBuildInfo( prog, info.device, CL_PROGRAM_BUILD_LOG, 4096, build_c, 0);
+		clGetProgramBuildInfo( prog, info.device, CL_PROGRAM_BUILD_LOG, 1024*64, build_c, 0);
 		fprintf(stderr, "Build Log for %s program:\n%s\n", filename, build_c);
 	}
 	check_error(info);
@@ -76,6 +138,7 @@
 void cl_setup()
 {
 	if(!cl.initialized){
+        printf("initializing\n");
 		cl = cl_init();
 	}
 }
@@ -92,21 +155,31 @@
 void cl_read_array(cl_mem mem, float *x, int n)
 {
     cl_setup();
-    clEnqueueReadBuffer(cl.queue, mem, CL_TRUE, 0, sizeof(float)*n,x,0,0,0);
+    cl.error = clEnqueueReadBuffer(cl.queue, mem, CL_TRUE, 0, sizeof(float)*n,x,0,0,0);
     check_error(cl);
 }
 
+float cl_checksum(cl_mem mem, int n)
+{
+    
+    float *x = calloc(n, sizeof(float));
+    cl_read_array(mem, x, n);
+    float sum = sum_array(x, n);
+    free(x);
+    return sum;
+}
+
 void cl_write_array(cl_mem mem, float *x, int n)
 {
     cl_setup();
-    clEnqueueWriteBuffer(cl.queue, mem, CL_TRUE, 0,sizeof(float)*n,x,0,0,0);
+    cl.error = clEnqueueWriteBuffer(cl.queue, mem, CL_TRUE, 0,sizeof(float)*n,x,0,0,0);
     check_error(cl);
 }
 
 void cl_copy_array(cl_mem src, cl_mem dst, int n)
 {
     cl_setup();
-    clEnqueueCopyBuffer(cl.queue, src, dst, 0, 0, sizeof(float)*n,0,0,0);
+    cl.error = clEnqueueCopyBuffer(cl.queue, src, dst, 0, 0, sizeof(float)*n,0,0,0);
     check_error(cl);
 }
 
@@ -115,10 +188,12 @@
     cl_buffer_region r;
     r.origin = offset*sizeof(float);
     r.size = size*sizeof(float);
-    cl_mem sub = clCreateSubBuffer(src, CL_MEM_USE_HOST_PTR, CL_BUFFER_CREATE_TYPE_REGION, &r, 0);
+    cl_mem sub = clCreateSubBuffer(src, CL_MEM_READ_WRITE, CL_BUFFER_CREATE_TYPE_REGION, &r, &cl.error);
+    check_error(cl);
     return sub;
 }
 
+
 cl_mem cl_make_array(float *x, int n)
 {
     cl_setup();
@@ -126,6 +201,17 @@
             CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
             sizeof(float)*n, x, &cl.error);
     check_error(cl);
+    activate_array_ongpu(mem, n, LINEAR);
+    return mem;
+}
+
+cl_mem cl_make_int_array(int *x, int n)
+{
+    cl_setup();
+    cl_mem mem = clCreateBuffer(cl.context,
+            CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR,
+            sizeof(int)*n, x, &cl.error);
+    check_error(cl);
     return mem;
 }
 

--
Gitblit v1.10.0