| | |
| | | |
| | | UNAME = $(shell uname) |
| | | OPTS=-Ofast -flto |
| | | #OPTS=-O3 |
| | | ifeq ($(UNAME), Darwin) |
| | | COMMON+= -isystem /usr/local/Cellar/opencv/2.4.6.1/include/opencv -isystem /usr/local/Cellar/opencv/2.4.6.1/include |
| | | ifeq ($(GPU), 1) |
| | |
| | | |
| | | size_t gsize = n; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, &gsize, 0, 0, 0, 0); |
| | | cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, &gsize, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | | |
| | |
| | | |
| | | size_t gsize = n; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, &gsize, 0, 0, 0, 0); |
| | | cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, &gsize, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | | #endif |
| | |
| | | |
| | | const size_t global_size[] = {N}; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | |
| | | } |
| | |
| | | |
| | | const size_t global_size[] = {N}; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | | void scal_ongpu(int N, float ALPHA, cl_mem X, int INCX) |
| | |
| | | |
| | | const size_t global_size[] = {N}; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | | #endif |
| | |
| | | |
| | | void test_parser() |
| | | { |
| | | network net = parse_network_cfg("cfg/test_parser.cfg"); |
| | | save_network(net, "cfg/test_parser_1.cfg"); |
| | | network net2 = parse_network_cfg("cfg/test_parser_1.cfg"); |
| | | save_network(net2, "cfg/test_parser_2.cfg"); |
| | | network net = parse_network_cfg("cfg/trained_imagenet.cfg"); |
| | | save_network(net, "cfg/trained_imagenet_smaller.cfg"); |
| | | } |
| | | |
| | | void test_data() |
| | |
| | | normalize_data_rows(train); |
| | | printf("Loaded: %lf seconds\n", sec(clock()-time)); |
| | | time=clock(); |
| | | float loss = train_network_data_gpu(net, train, imgs); |
| | | //float loss = train_network_data(net, train, imgs); |
| | | float loss = 0; |
| | | printf("%d: %f, Time: %lf seconds\n", i*net.batch*imgs, loss, sec(clock()-time)); |
| | | free_data(train); |
| | | if(i%10==0){ |
| | |
| | | void train_imagenet() |
| | | { |
| | | float avg_loss = 1; |
| | | network net = parse_network_cfg("/home/pjreddie/imagenet_backup/imagenet_nin_2680.cfg"); |
| | | network net = parse_network_cfg("/home/pjreddie/imagenet_backup/imagenet_2280.cfg"); |
| | | //network net = parse_network_cfg("cfg/imagenet2.cfg"); |
| | | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); |
| | | int imgs = 1000/net.batch+1; |
| | | srand(time(0)); |
| | |
| | | free_data(train); |
| | | if(i%10==0){ |
| | | char buff[256]; |
| | | sprintf(buff, "/home/pjreddie/imagenet_backup/imagenet_nin_%d.cfg", i); |
| | | sprintf(buff, "/home/pjreddie/imagenet_backup/imagenet_%d.cfg", i); |
| | | save_network(net, buff); |
| | | } |
| | | } |
| | |
| | | char filename[256]; |
| | | int indexes[10]; |
| | | while(1){ |
| | | gets(filename); |
| | | fgets(filename, 256, stdin); |
| | | image im = load_image_color(filename, 256, 256); |
| | | z_normalize_image(im); |
| | | printf("%d %d %d\n", im.h, im.w, im.c); |
| | |
| | | data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10); |
| | | data test = load_categorical_data_csv("data/mnist/mnist_test.csv",0,10); |
| | | translate_data_rows(train, -144); |
| | | //scale_data_rows(train, 1./128); |
| | | translate_data_rows(test, -144); |
| | | //scale_data_rows(test, 1./128); |
| | | //randomize_data(train); |
| | | int count = 0; |
| | | //clock_t start = clock(), end; |
| | | int iters = 10000/net.batch; |
| | | int iters = 50000/net.batch; |
| | | while(++count <= 2000){ |
| | | clock_t start = clock(), end; |
| | | float loss = train_network_sgd(net, train, iters); |
| | | end = clock(); |
| | | float test_acc = network_accuracy(net, test); |
| | | //float test_acc = 0; |
| | | printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds, LR: %f, Momentum: %f, Decay: %f\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC, net.learning_rate, net.momentum, net.decay); |
| | | /*printf("%f %f %f %f %f\n", mean_array(get_network_output_layer(net,0), 100), |
| | | mean_array(get_network_output_layer(net,1), 100), |
| | | mean_array(get_network_output_layer(net,2), 100), |
| | | mean_array(get_network_output_layer(net,3), 100), |
| | | mean_array(get_network_output_layer(net,4), 100)); |
| | | */ |
| | | //save_network(net, "cfg/nist_final2.cfg"); |
| | | |
| | | //printf("%5d Training Loss: %lf, Params: %f %f %f, ",count*1000, loss, lr, momentum, decay); |
| | | //end = clock(); |
| | | //printf("Time: %lf seconds\n", (float)(end-start)/CLOCKS_PER_SEC); |
| | | //start=end; |
| | | //lr *= .5; |
| | | printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds\n", count, loss, test_acc,(float)(end-start)/CLOCKS_PER_SEC); |
| | | } |
| | | //save_network(net, "cfg/nist_basic_trained.cfg"); |
| | | } |
| | | |
| | | void test_ensemble() |
| | |
| | | } |
| | | if(0==strcmp(argv[1], "train")) train_imagenet(); |
| | | else if(0==strcmp(argv[1], "asirra")) train_asirra(); |
| | | else if(0==strcmp(argv[1], "nist")) train_nist(); |
| | | else if(0==strcmp(argv[1], "train_small")) train_imagenet_small(); |
| | | else if(0==strcmp(argv[1], "test_correct")) test_gpu_net(); |
| | | else if(0==strcmp(argv[1], "test")) test_imagenet(); |
| | |
| | | |
| | | size_t global_size = channels*height*width*batch; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, |
| | | cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, |
| | | &global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | |
| | | |
| | | connected_layer *make_connected_layer(int batch, int inputs, int outputs, ACTIVATION activation, float learning_rate, float momentum, float decay) |
| | | { |
| | | fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs); |
| | | int i; |
| | | connected_layer *layer = calloc(1, sizeof(connected_layer)); |
| | | |
| | |
| | | layer->delta_cl = cl_make_array(layer->delta, outputs*batch); |
| | | #endif |
| | | layer->activation = activation; |
| | | fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs); |
| | | return layer; |
| | | } |
| | | |
| | |
| | | |
| | | const size_t global_size[] = {layer.n}; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | | |
| | |
| | | |
| | | const size_t global_size[] = {layer.n*size, layer.batch}; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 2, 0, global_size, 0, 0, 0, 0); |
| | | cl.error = clEnqueueNDRangeKernel(queue, kernel, 2, 0, global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | | |
| | |
| | | |
| | | gemm_ongpu_offset(0,1,m,n,k,1,a,i*m*k,k,b,i*k*n,k,1,c,0,n); |
| | | } |
| | | //cl_read_array(layer.delta_cl, layer.delta, m*k*layer.batch); |
| | | |
| | | if(delta_cl){ |
| | | m = layer.size*layer.size*layer.c; |
| | |
| | | #include "cost_layer.h" |
| | | #include "utils.h" |
| | | #include "mini_blas.h" |
| | | #include <math.h> |
| | | #include <stdlib.h> |
| | |
| | | { |
| | | if (!truth) return; |
| | | |
| | | |
| | | copy_ongpu(layer.batch*layer.inputs, truth, 1, layer.delta_cl, 1); |
| | | axpy_ongpu(layer.batch*layer.inputs, -1, input, 1, layer.delta_cl, 1); |
| | | |
| | | cl_read_array(layer.delta_cl, layer.delta, layer.batch*layer.inputs); |
| | | *(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1); |
| | | //printf("%f\n", *layer.output); |
| | | } |
| | | |
| | | void backward_cost_layer_gpu(const cost_layer layer, cl_mem input, cl_mem delta) |
| | |
| | | return lines; |
| | | } |
| | | |
| | | void fill_truth_det(char *path, float *truth) |
| | | { |
| | | find_replace(path, "imgs", "det"); |
| | | find_replace(path, ".JPEG", ".txt"); |
| | | } |
| | | |
| | | void fill_truth(char *path, char **labels, int k, float *truth) |
| | | { |
| | | int i; |
| | |
| | | |
| | | data load_data_image_pathfile_part(char *filename, int part, int total, char **labels, int k, int h, int w) |
| | | { |
| | | clock_t time = clock(); |
| | | list *plist = get_paths(filename); |
| | | char **paths = (char **)list_to_array(plist); |
| | | int start = part*plist->size/total; |
| | |
| | | #include "dropout_layer.h" |
| | | #include "stdlib.h" |
| | | #include "stdio.h" |
| | | #include "utils.h" |
| | | #include <stdlib.h> |
| | | #include <stdio.h> |
| | | |
| | | dropout_layer *make_dropout_layer(int batch, int inputs, float probability) |
| | | { |
| | |
| | | layer->probability = probability; |
| | | layer->inputs = inputs; |
| | | layer->batch = batch; |
| | | #ifdef GPU |
| | | layer->rand = calloc(inputs*batch, sizeof(float)); |
| | | layer->rand_cl = cl_make_array(layer->rand, inputs*batch); |
| | | #endif |
| | | return layer; |
| | | } |
| | | |
| | |
| | | { |
| | | int i; |
| | | for(i = 0; i < layer.batch * layer.inputs; ++i){ |
| | | if((float)rand()/RAND_MAX < layer.probability) input[i] = 0; |
| | | if(rand_uniform() < layer.probability) input[i] = 0; |
| | | else input[i] /= (1-layer.probability); |
| | | } |
| | | } |
| | |
| | | { |
| | | // Don't do shit LULZ |
| | | } |
| | | |
| | | #ifdef GPU |
| | | cl_kernel get_dropout_kernel() |
| | | { |
| | | static int init = 0; |
| | | static cl_kernel kernel; |
| | | if(!init){ |
| | | kernel = get_kernel("src/dropout_layer.cl", "forward", 0); |
| | | init = 1; |
| | | } |
| | | return kernel; |
| | | } |
| | | |
| | | void forward_dropout_layer_gpu(dropout_layer layer, cl_mem input) |
| | | { |
| | | int j; |
| | | int size = layer.inputs*layer.batch; |
| | | for(j = 0; j < size; ++j) layer.rand[j] = rand_uniform(); |
| | | cl_write_array(layer.rand_cl, layer.rand, layer.inputs*layer.batch); |
| | | |
| | | cl_kernel kernel = get_dropout_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | | cl_uint i = 0; |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(input), (void*) &input); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(layer.rand_cl), (void*) &layer.rand_cl); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(layer.probability), (void*) &layer.probability); |
| | | check_error(cl); |
| | | |
| | | const size_t global_size[] = {size}; |
| | | |
| | | cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | | #endif |
| | |
| | | #ifndef DROPOUT_LAYER_H |
| | | #define DROPOUT_LAYER_H |
| | | #include "opencl.h" |
| | | |
| | | typedef struct{ |
| | | int batch; |
| | | int inputs; |
| | | float probability; |
| | | #ifdef GPU |
| | | float *rand; |
| | | cl_mem rand_cl; |
| | | #endif |
| | | } dropout_layer; |
| | | |
| | | dropout_layer *make_dropout_layer(int batch, int inputs, float probability); |
| | | |
| | | void forward_dropout_layer(dropout_layer layer, float *input); |
| | | void backward_dropout_layer(dropout_layer layer, float *input, float *delta); |
| | | #ifdef GPU |
| | | void forward_dropout_layer_gpu(dropout_layer layer, cl_mem input); |
| | | |
| | | #endif |
| | | #endif |
| | |
| | | input[i] *= 2.*((float)rand()/RAND_MAX); |
| | | } |
| | | } |
| | | |
| | | void backward_freeweight_layer(freeweight_layer layer, float *input, float *delta) |
| | | { |
| | | // Don't do shit LULZ |
| | |
| | | const size_t global_size[] = {ceil((float)N/BLOCK)*BLOCK, ceil((float)M/BLOCK)*BLOCK}; |
| | | const size_t local_size[] = {BLOCK, BLOCK}; |
| | | |
| | | clEnqueueNDRangeKernel(queue, gemm_kernel, 2, 0, global_size, local_size, 0, 0, 0); |
| | | cl.error = clEnqueueNDRangeKernel(queue, gemm_kernel, 2, 0, global_size, local_size, 0, 0, 0); |
| | | check_error(cl); |
| | | #endif |
| | | } |
| | |
| | | test_gpu_accuracy(0,1,1000,10,100); |
| | | test_gpu_accuracy(1,1,1000,10,100); |
| | | */ |
| | | time_ongpu(0,0,512,256,1152); |
| | | time_ongpu(0,0,128,1200,4096); |
| | | time_ongpu(0,0,128,1200,4096); |
| | | time_ongpu(0,0,128,1200,4096); |
| | |
| | | time_ongpu(1,0,4096,1200,128); |
| | | time_ongpu(1,0,1200,128,4096); |
| | | |
| | | test_gpu_accuracy(0,0,512,256,1152); |
| | | test_gpu_accuracy(0,0,131,4093,1199); |
| | | test_gpu_accuracy(0,1,131,4093,1199); |
| | | test_gpu_accuracy(1,0,131,4093,1199); |
| | |
| | | |
| | | size_t global_size = batch*channels_col*height_col*width_col; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, |
| | | cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, |
| | | &global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | |
| | | |
| | | const size_t global_size[] = {h*w*c*layer.batch}; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | | |
| | |
| | | |
| | | const size_t global_size[] = {layer.h*layer.w*layer.c*layer.batch}; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | | |
| | |
| | | |
| | | void test_blas() |
| | | { |
| | | |
| | | time_random_matrix(0,0,100,100,100); |
| | | time_random_matrix(1,0,100,100,100); |
| | | time_random_matrix(0,1,100,100,100); |
| | |
| | | } |
| | | } |
| | | |
| | | void top_predictions(network net, int n, int *index) |
| | | void top_predictions(network net, int k, int *index) |
| | | { |
| | | int i,j; |
| | | int k = get_network_output_size(net); |
| | | int size = get_network_output_size(net); |
| | | float *out = get_network_output(net); |
| | | float thresh = FLT_MAX; |
| | | for(i = 0; i < n; ++i){ |
| | | float max = -FLT_MAX; |
| | | int max_i = -1; |
| | | for(j = 0; j < k; ++j){ |
| | | float val = out[j]; |
| | | if(val > max && val < thresh){ |
| | | max = val; |
| | | max_i = j; |
| | | } |
| | | } |
| | | index[i] = max_i; |
| | | thresh = max; |
| | | } |
| | | top_k(out, size, k, index); |
| | | } |
| | | |
| | | |
| | |
| | | { |
| | | //printf("start\n"); |
| | | int i; |
| | | // printf("Truth: %f\n", cl_checksum(truth, 1000*net.batch)); |
| | | for(i = 0; i < net.n; ++i){ |
| | | //printf("Truth %i: %f\n", i, cl_checksum(truth, 1000*net.batch)); |
| | | //clock_t time = clock(); |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | |
| | | forward_softmax_layer_gpu(layer, input); |
| | | input = layer.output_cl; |
| | | } |
| | | else if(net.types[i] == DROPOUT){ |
| | | if(!train) continue; |
| | | dropout_layer layer = *(dropout_layer *)net.layers[i]; |
| | | forward_dropout_layer_gpu(layer, input); |
| | | } |
| | | //printf("%d %f\n", i, sec(clock()-time)); |
| | | /* |
| | | else if(net.types[i] == CROP){ |
| | |
| | | else if(net.types[i] == SOFTMAX){ |
| | | softmax_layer layer = *(softmax_layer *)net.layers[i]; |
| | | return layer.output_cl; |
| | | } else if(net.types[i] == DROPOUT){ |
| | | return get_network_output_cl_layer(net, i-1); |
| | | } |
| | | return 0; |
| | | } |
| | |
| | | else if(net.types[i] == SOFTMAX){ |
| | | softmax_layer layer = *(softmax_layer *)net.layers[i]; |
| | | return layer.delta_cl; |
| | | } else if(net.types[i] == DROPOUT){ |
| | | return get_network_delta_cl_layer(net, i-1); |
| | | } |
| | | return 0; |
| | | } |
| | |
| | | } |
| | | //printf("trans %f\n", sec(clock()-time)); |
| | | //time = clock(); |
| | | |
| | | forward_network_gpu(net, *net.input_cl, *net.truth_cl, 1); |
| | | |
| | | //printf("forw %f\n", sec(clock()-time)); |
| | | //time = clock(); |
| | | backward_network_gpu(net, *net.input_cl); |
| | | //printf("back %f\n", sec(clock()-time)); |
| | | //time = clock(); |
| | | |
| | | update_network_gpu(net); |
| | | float error = get_network_cost(net); |
| | | |
| | | //printf("updt %f\n", sec(clock()-time)); |
| | | //time = clock(); |
| | | return error; |
| | |
| | | |
| | | #include "opencl.h" |
| | | #include "utils.h" |
| | | #include "activations.h" |
| | | |
| | | cl_info cl = {0}; |
| | | |
| | | void check_error(cl_info info) |
| | | { |
| | | clFinish(cl.queue); |
| | | // clFinish(cl.queue); |
| | | if (info.error != CL_SUCCESS) { |
| | | printf("\n Error number %d", info.error); |
| | | abort(); |
| | | exit(1); |
| | | } |
| | | } |
| | |
| | | printf(" DEVICE_MAX_CLOCK_FREQUENCY = %u\n", (unsigned int)buf_uint); |
| | | clGetDeviceInfo(devices[i], CL_DEVICE_GLOBAL_MEM_SIZE, sizeof(buf_ulong), &buf_ulong, NULL); |
| | | printf(" DEVICE_GLOBAL_MEM_SIZE = %llu\n", (unsigned long long)buf_ulong); |
| | | clGetDeviceInfo(devices[i], CL_DEVICE_MAX_MEM_ALLOC_SIZE, sizeof(buf_ulong), &buf_ulong, NULL); |
| | | printf(" DEVICE_MAX_MEM_ALLOC_SIZE = %llu\n", (unsigned long long)buf_ulong); |
| | | clGetDeviceInfo(devices[i], CL_DEVICE_MAX_WORK_GROUP_SIZE, sizeof(buf_ulong), &buf_ulong, NULL); |
| | | printf(" DEVICE_MAX_WORK_GROUP_SIZE = %llu\n", (unsigned long long)buf_ulong); |
| | | cl_uint items; |
| | |
| | | void cl_read_array(cl_mem mem, float *x, int n) |
| | | { |
| | | cl_setup(); |
| | | clEnqueueReadBuffer(cl.queue, mem, CL_TRUE, 0, sizeof(float)*n,x,0,0,0); |
| | | cl.error = clEnqueueReadBuffer(cl.queue, mem, CL_TRUE, 0, sizeof(float)*n,x,0,0,0); |
| | | check_error(cl); |
| | | } |
| | | |
| | | float cl_checksum(cl_mem mem, int n) |
| | | { |
| | | |
| | | float *x = calloc(n, sizeof(float)); |
| | | cl_read_array(mem, x, n); |
| | | float sum = sum_array(x, n); |
| | | free(x); |
| | | return sum; |
| | | } |
| | | |
| | | void cl_write_array(cl_mem mem, float *x, int n) |
| | | { |
| | | cl_setup(); |
| | | clEnqueueWriteBuffer(cl.queue, mem, CL_TRUE, 0,sizeof(float)*n,x,0,0,0); |
| | | cl.error = clEnqueueWriteBuffer(cl.queue, mem, CL_TRUE, 0,sizeof(float)*n,x,0,0,0); |
| | | check_error(cl); |
| | | } |
| | | |
| | | void cl_copy_array(cl_mem src, cl_mem dst, int n) |
| | | { |
| | | cl_setup(); |
| | | clEnqueueCopyBuffer(cl.queue, src, dst, 0, 0, sizeof(float)*n,0,0,0); |
| | | cl.error = clEnqueueCopyBuffer(cl.queue, src, dst, 0, 0, sizeof(float)*n,0,0,0); |
| | | check_error(cl); |
| | | } |
| | | |
| | |
| | | return sub; |
| | | } |
| | | |
| | | |
| | | cl_mem cl_make_array(float *x, int n) |
| | | { |
| | | cl_setup(); |
| | |
| | | CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, |
| | | sizeof(float)*n, x, &cl.error); |
| | | check_error(cl); |
| | | activate_array_ongpu(mem, n, LINEAR); |
| | | return mem; |
| | | } |
| | | |
| | |
| | | cl_mem cl_make_int_array(int *x, int n); |
| | | void cl_copy_array(cl_mem src, cl_mem dst, int n); |
| | | cl_mem cl_sub_array(cl_mem src, int offset, int size); |
| | | float cl_checksum(cl_mem mem, int n); |
| | | #endif |
| | | #endif |
| | |
| | | |
| | | const size_t global_size[] = {layer.batch}; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | cl.error = clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | |
| | | /* |
| | |
| | | #include "utils.h" |
| | | #include <stdio.h> |
| | | #include <stdlib.h> |
| | | #include <string.h> |
| | | #include <math.h> |
| | | #include <float.h> |
| | | |
| | | #include "utils.h" |
| | | |
| | | char *find_replace(char *str, char *orig, char *rep) |
| | | { |
| | | static char buffer[4096]; |
| | | char *p; |
| | | |
| | | if(!(p = strstr(str, orig))) // Is 'orig' even in 'str'? |
| | | return str; |
| | | |
| | | strncpy(buffer, str, p-str); // Copy characters from 'str' start to 'orig' st$ |
| | | buffer[p-str] = '\0'; |
| | | |
| | | sprintf(buffer+(p-str), "%s%s", rep, p+strlen(orig)); |
| | | |
| | | return buffer; |
| | | } |
| | | |
| | | float sec(clock_t clocks) |
| | | { |
| | | return (float)clocks/CLOCKS_PER_SEC; |
| | | } |
| | | |
| | | void top_k(float *a, int n, int k, int *index) |
| | | { |
| | | int i,j; |
| | | float thresh = FLT_MAX; |
| | | for(i = 0; i < k; ++i){ |
| | | float max = -FLT_MAX; |
| | | int max_i = -1; |
| | | for(j = 0; j < n; ++j){ |
| | | float val = a[j]; |
| | | if(val > max && val < thresh){ |
| | | max = val; |
| | | max_i = j; |
| | | } |
| | | } |
| | | index[i] = max_i; |
| | | thresh = max; |
| | | } |
| | | } |
| | | |
| | | void error(char *s) |
| | | { |
| | | fprintf(stderr, "Error: %s\n", s); |
| | |
| | | #include <time.h> |
| | | #include "list.h" |
| | | |
| | | char *find_replace(char *str, char *orig, char *rep); |
| | | void error(char *s); |
| | | void malloc_error(); |
| | | void file_error(char *s); |
| | | void strip(char *s); |
| | | void strip_char(char *s, char bad); |
| | | void top_k(float *a, int n, int k, int *index); |
| | | list *split_str(char *s, char delim); |
| | | char *fgetl(FILE *fp); |
| | | list *parse_csv_line(char *line); |