Fixed race condition in server
| | |
| | | |
| | | void activate_array_ongpu(cl_mem x, int n, ACTIVATION a) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel kernel = get_activation_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | |
| | | |
| | | void gradient_array_ongpu(cl_mem x, int n, ACTIVATION a, cl_mem delta) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel kernel = get_gradient_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | |
| | | |
| | | void axpy_ongpu_offset(int N, float ALPHA, cl_mem X, int OFFX, int INCX, cl_mem Y, int OFFY, int INCY) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel kernel = get_axpy_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | |
| | | } |
| | | void copy_ongpu_offset(int N, cl_mem X, int OFFX, int INCX, cl_mem Y, int OFFY, int INCY) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel kernel = get_copy_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | |
| | | } |
| | | void scal_ongpu(int N, float ALPHA, cl_mem X, int INCX) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel kernel = get_scal_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | |
| | | } |
| | | } |
| | | |
| | | void train_nist_distributed(char *address) |
| | | { |
| | | srand(time(0)); |
| | | network net = parse_network_cfg("cfg/nist.client"); |
| | | data train = load_categorical_data_csv("data/mnist/mnist_train.csv", 0, 10); |
| | | //data test = load_categorical_data_csv("data/mnist/mnist_test.csv",0,10); |
| | | normalize_data_rows(train); |
| | | //normalize_data_rows(test); |
| | | int count = 0; |
| | | int iters = 50000/net.batch; |
| | | iters = 1000/net.batch + 1; |
| | | while(++count <= 2000){ |
| | | clock_t start = clock(), end; |
| | | float loss = train_network_sgd_gpu(net, train, iters); |
| | | client_update(net, address); |
| | | end = clock(); |
| | | //float test_acc = network_accuracy_gpu(net, test); |
| | | //float test_acc = 0; |
| | | printf("%d: Loss: %f, Time: %lf seconds\n", count, loss, (float)(end-start)/CLOCKS_PER_SEC); |
| | | } |
| | | } |
| | | |
| | | void test_ensemble() |
| | | { |
| | | int i; |
| | |
| | | void run_server() |
| | | { |
| | | srand(time(0)); |
| | | network net = parse_network_cfg("cfg/alexnet.server"); |
| | | network net = parse_network_cfg("cfg/nist.server"); |
| | | server_update(net); |
| | | } |
| | | void test_client() |
| | |
| | | printf("Transfered: %lf seconds\n", sec(clock()-time)); |
| | | } |
| | | |
| | | int find_int_arg(int argc, char* argv[], char *arg) |
| | | { |
| | | int i; |
| | | for(i = 0; i < argc-1; ++i) if(0==strcmp(argv[i], arg)) return atoi(argv[i+1]); |
| | | return 0; |
| | | } |
| | | |
| | | int main(int argc, char *argv[]) |
| | | { |
| | | if(argc < 2){ |
| | | fprintf(stderr, "usage: %s <function>\n", argv[0]); |
| | | return 0; |
| | | } |
| | | int index = find_int_arg(argc, argv, "-i"); |
| | | #ifdef GPU |
| | | cl_setup(index); |
| | | #endif |
| | | if(0==strcmp(argv[1], "train")) train_imagenet(); |
| | | else if(0==strcmp(argv[1], "detection")) train_detection_net(); |
| | | else if(0==strcmp(argv[1], "asirra")) train_asirra(); |
| | |
| | | fprintf(stderr, "usage: %s <function>\n", argv[0]); |
| | | return 0; |
| | | } |
| | | else if(0==strcmp(argv[1], "client")) train_imagenet_distributed(argv[2]); |
| | | else if(0==strcmp(argv[1], "client")) train_nist_distributed(argv[2]); |
| | | else if(0==strcmp(argv[1], "visualize")) test_visualize(argv[2]); |
| | | else if(0==strcmp(argv[1], "valid")) validate_imagenet(argv[2]); |
| | | fprintf(stderr, "Success!\n"); |
| | |
| | | int channels, int height, int width, |
| | | int ksize, int stride, int pad, cl_mem data_im) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel kernel = get_col2im_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | |
| | | { |
| | | int size = convolutional_out_height(layer) * convolutional_out_width(layer); |
| | | |
| | | cl_setup(); |
| | | cl_kernel kernel = get_convolutional_learn_bias_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | |
| | | int out_w = convolutional_out_width(layer); |
| | | int size = out_h*out_w; |
| | | |
| | | cl_setup(); |
| | | cl_kernel kernel = get_convolutional_bias_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | |
| | | |
| | | void mask_ongpu(int n, cl_mem x, cl_mem mask, int mod) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel kernel = get_mask_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | |
| | | cl_mem C_gpu, int c_off, int ldc) |
| | | { |
| | | #ifdef CLBLAS |
| | | cl_setup(); |
| | | cl_command_queue queue = cl.queue; |
| | | cl_event event; |
| | | cl.error = clblasSgemm(clblasRowMajor, TA?clblasTrans:clblasNoTrans, TB?clblasTrans:clblasNoTrans,M, N, K,ALPHA, A_gpu, a_off, lda,B_gpu, b_off, ldb,BETA, C_gpu, c_off, ldc,1, &queue, 0, NULL, &event); |
| | | check_error(cl); |
| | | #else |
| | | //printf("gpu: %d %d %d %d %d\n",TA, TB, M, N, K); |
| | | cl_setup(); |
| | | cl_kernel gemm_kernel = get_gemm_kernel(); |
| | | if(!TA && !TB) gemm_kernel = get_gemm_nn_kernel(); |
| | | if(!TA && TB) gemm_kernel = get_gemm_nt_kernel(); |
| | |
| | | float BETA, |
| | | float *C, int ldc) |
| | | { |
| | | cl_setup(); |
| | | cl_context context = cl.context; |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | |
| | | int channels, int height, int width, |
| | | int ksize, int stride, int pad, cl_mem data_col) |
| | | { |
| | | cl_setup(); |
| | | |
| | | int height_col = (height - ksize) / stride + 1; |
| | | int width_col = (width - ksize) / stride + 1; |
| | |
| | | int channels, int height, int width, |
| | | int ksize, int stride, int pad, float *data_col) |
| | | { |
| | | cl_setup(); |
| | | cl_context context = cl.context; |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | |
| | | int h = (layer.h-1)/layer.stride + 1; |
| | | int w = (layer.w-1)/layer.stride + 1; |
| | | int c = layer.c; |
| | | cl_setup(); |
| | | cl_kernel kernel = get_forward_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | |
| | | |
| | | void backward_maxpool_layer_gpu(maxpool_layer layer, cl_mem delta) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel kernel = get_backward_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | |
| | | |
| | | #define MAX_DEVICES 10 |
| | | |
| | | cl_info cl_init() |
| | | cl_info cl_init(int index) |
| | | { |
| | | cl_info info; |
| | | info.initialized = 0; |
| | |
| | | printf(" DEVICE_MAX_WORK_ITEM_SIZES = %u / %u / %u \n", (unsigned int)workitem_size[0], (unsigned int)workitem_size[1], (unsigned int)workitem_size[2]); |
| | | |
| | | } |
| | | int index = getpid()%num_devices; |
| | | index = 1; |
| | | index = index%num_devices; |
| | | printf("%d rand, %d devices, %d index\n", getpid(), num_devices, index); |
| | | info.device = devices[index]; |
| | | fprintf(stderr, "Found %d device(s)\n", num_devices); |
| | |
| | | return prog; |
| | | } |
| | | |
| | | void cl_setup() |
| | | void cl_setup(int index) |
| | | { |
| | | if(!cl.initialized){ |
| | | printf("initializing\n"); |
| | | cl = cl_init(); |
| | | cl = cl_init(index); |
| | | } |
| | | } |
| | | |
| | | cl_kernel get_kernel(char *filename, char *kernelname, char *options) |
| | | { |
| | | cl_setup(); |
| | | cl_program prog = cl_fprog(filename, options, cl); |
| | | cl_kernel kernel=clCreateKernel(prog, kernelname, &cl.error); |
| | | check_error(cl); |
| | |
| | | |
| | | void cl_read_array(cl_mem mem, float *x, int n) |
| | | { |
| | | cl_setup(); |
| | | cl.error = clEnqueueReadBuffer(cl.queue, mem, CL_TRUE, 0, sizeof(float)*n,x,0,0,0); |
| | | check_error(cl); |
| | | } |
| | |
| | | |
| | | void cl_write_array(cl_mem mem, float *x, int n) |
| | | { |
| | | cl_setup(); |
| | | cl.error = clEnqueueWriteBuffer(cl.queue, mem, CL_TRUE, 0,sizeof(float)*n,x,0,0,0); |
| | | check_error(cl); |
| | | } |
| | | |
| | | void cl_copy_array(cl_mem src, cl_mem dst, int n) |
| | | { |
| | | cl_setup(); |
| | | cl.error = clEnqueueCopyBuffer(cl.queue, src, dst, 0, 0, sizeof(float)*n,0,0,0); |
| | | check_error(cl); |
| | | } |
| | |
| | | |
| | | cl_mem cl_make_array(float *x, int n) |
| | | { |
| | | cl_setup(); |
| | | cl_mem mem = clCreateBuffer(cl.context, |
| | | CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, |
| | | sizeof(float)*n, x, &cl.error); |
| | |
| | | |
| | | cl_mem cl_make_int_array(int *x, int n) |
| | | { |
| | | cl_setup(); |
| | | cl_mem mem = clCreateBuffer(cl.context, |
| | | CL_MEM_READ_WRITE|CL_MEM_COPY_HOST_PTR, |
| | | sizeof(int)*n, x, &cl.error); |
| | |
| | | |
| | | extern cl_info cl; |
| | | |
| | | void cl_setup(); |
| | | void cl_setup(int index); |
| | | void check_error(cl_info info); |
| | | cl_kernel get_kernel(char *filename, char *kernelname, char *options); |
| | | void cl_read_array(cl_mem mem, float *x, int n); |
| | |
| | | |
| | | void read_all(int fd, char *buffer, size_t bytes) |
| | | { |
| | | //printf("Want %d\n", bytes); |
| | | size_t n = 0; |
| | | while(n < bytes){ |
| | | int next = read(fd, buffer + n, bytes-n); |
| | | if(next < 0) error("read failed"); |
| | | if(next <= 0) error("read failed"); |
| | | n += next; |
| | | } |
| | | } |
| | | |
| | | void write_all(int fd, char *buffer, size_t bytes) |
| | | { |
| | | //printf("Writ %d\n", bytes); |
| | | size_t n = 0; |
| | | while(n < bytes){ |
| | | int next = write(fd, buffer + n, bytes-n); |
| | | if(next < 0) error("write failed"); |
| | | if(next <= 0) error("write failed"); |
| | | n += next; |
| | | } |
| | | } |
| | |
| | | |
| | | void handle_connection(void *pointer) |
| | | { |
| | | printf("New Connection\n"); |
| | | connection_info info = *(connection_info *) pointer; |
| | | free(pointer); |
| | | printf("New Connection\n"); |
| | | int fd = info.fd; |
| | | network net = info.net; |
| | | int i; |
| | |
| | | } |
| | | printf("Received updates\n"); |
| | | close(fd); |
| | | ++*(info.counter); |
| | | if(*(info.counter)%10==0) save_network(net, "/home/pjreddie/imagenet_backup/alexnet.part"); |
| | | } |
| | | |
| | | void server_update(network net) |
| | |
| | | listen(fd, 10); |
| | | struct sockaddr_in client; /* remote address */ |
| | | socklen_t client_size = sizeof(client); /* length of addresses */ |
| | | connection_info info; |
| | | info.net = net; |
| | | info.counter = &counter; |
| | | while(1){ |
| | | connection_info *info = calloc(1, sizeof(connection_info)); |
| | | info->net = net; |
| | | info->counter = &counter; |
| | | pthread_t worker; |
| | | int connection = accept(fd, (struct sockaddr *) &client, &client_size); |
| | | info.fd = connection; |
| | | pthread_create(&worker, NULL, (void *) &handle_connection, &info); |
| | | info->fd = connection; |
| | | pthread_create(&worker, NULL, (void *) &handle_connection, info); |
| | | ++counter; |
| | | if(counter%1000==0) save_network(net, "cfg/nist.part"); |
| | | } |
| | | } |
| | | |
| | |
| | | |
| | | /* send a message to the server */ |
| | | int i; |
| | | //printf("Sending\n"); |
| | | for(i = 0; i < net.n; ++i){ |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *) net.layers[i]; |
| | |
| | | memset(layer.weight_updates, 0, layer.inputs*layer.outputs*sizeof(float)); |
| | | } |
| | | } |
| | | //printf("Sent\n"); |
| | | |
| | | for(i = 0; i < net.n; ++i){ |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | |
| | | push_connected_layer(layer); |
| | | } |
| | | } |
| | | //printf("Updated\n"); |
| | | close(fd); |
| | | } |
| | |
| | | |
| | | void forward_softmax_layer_gpu(const softmax_layer layer, cl_mem input) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel kernel = get_softmax_forward_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |