Convolutional working on GPU
17 files modified
4 files added
| | |
| | | CC=gcc |
| | | GPU=0 |
| | | GPU=1 |
| | | COMMON=-Wall -Wfatal-errors `pkg-config --cflags opencv` -I/usr/local/cuda/include/ |
| | | ifeq ($(GPU), 1) |
| | | COMMON+=-DGPU |
| | |
| | | EXEC=cnn |
| | | OBJDIR=./obj/ |
| | | |
| | | OBJ=network.o image.o cnn.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o softmax_layer.o mini_blas.o convolutional_layer.o gemm.o normalization_layer.o opencl.o im2col.o col2im.o axpy.o dropout_layer.o crop_layer.o |
| | | OBJ=network.o image.o cnn.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o softmax_layer.o mini_blas.o convolutional_layer.o gemm.o normalization_layer.o opencl.o im2col.o col2im.o axpy.o dropout_layer.o crop_layer.o freeweight_layer.o cost_layer.o |
| | | OBJS = $(addprefix $(OBJDIR), $(OBJ)) |
| | | |
| | | all: $(EXEC) |
| | |
| | | float relu_activate(float x){return x*(x>0);} |
| | | float ramp_activate(float x){return x*(x>0)+.1*x;} |
| | | float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} |
| | | //float tanh_activate(float x){return x - (x*x*x)/3;} |
| | | |
| | | float linear_gradient(float x){return 1;} |
| | | float sigmoid_gradient(float x){return (1-x)*x;} |
| | |
| | | #include "mini_blas.h" |
| | | |
| | | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) |
| | | inline void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY) |
| | | { |
| | | int i; |
| | | for(i = 0; i < N; ++i) Y[i*INCY] += ALPHA*X[i*INCX]; |
| | | } |
| | | |
| | | void scal_cpu(int N, float ALPHA, float *X, int INCX) |
| | | inline void scal_cpu(int N, float ALPHA, float *X, int INCX) |
| | | { |
| | | int i; |
| | | for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA; |
| | | } |
| | | |
| | | inline void copy_cpu(int N, float *X, int INCX, float *Y, int INCY) |
| | | { |
| | | int i; |
| | | for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX]; |
| | | } |
| | | |
| | | inline float dot_cpu(int N, float *X, int INCX, float *Y, int INCY) |
| | | { |
| | | int i; |
| | | float dot = 0; |
| | | for(i = 0; i < N; ++i) dot += X[i*INCX] * Y[i*INCY]; |
| | | return dot; |
| | | } |
| | | |
| | | #ifdef GPU |
| | | #include "opencl.h" |
| | | |
| | | cl_kernel get_axpy_kernel() |
| | | { |
| | | static int init = 0; |
| | | static cl_kernel kernel; |
| | | if(!init){ |
| | | kernel = get_kernel("src/axpy.cl", "axpy", 0); |
| | | init = 1; |
| | | } |
| | | return kernel; |
| | | } |
| | | |
| | | cl_kernel get_copy_kernel() |
| | | { |
| | | static int init = 0; |
| | | static cl_kernel kernel; |
| | | if(!init){ |
| | | kernel = get_kernel("src/axpy.cl", "copy", 0); |
| | | init = 1; |
| | | } |
| | | return kernel; |
| | | } |
| | | |
| | | cl_kernel get_scal_kernel() |
| | | { |
| | | static int init = 0; |
| | | static cl_kernel kernel; |
| | | if(!init){ |
| | | kernel = get_kernel("src/axpy.cl", "scal", 0); |
| | | init = 1; |
| | | } |
| | | return kernel; |
| | | } |
| | | |
| | | |
| | | void axpy_ongpu(int N, float ALPHA, cl_mem X, int INCX, cl_mem Y, int INCY) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel kernel = get_axpy_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | | cl_uint i = 0; |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(N), (void*) &N); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(ALPHA), (void*) &ALPHA); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(X), (void*) &X); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(INCX), (void*) &INCX); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(Y), (void*) &Y); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(INCY), (void*) &INCY); |
| | | check_error(cl); |
| | | |
| | | const size_t global_size[] = {N}; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | |
| | | } |
| | | void copy_ongpu(int N, cl_mem X, int INCX, cl_mem Y, int INCY) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel kernel = get_copy_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | | cl_uint i = 0; |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(N), (void*) &N); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(X), (void*) &X); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(INCX), (void*) &INCX); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(Y), (void*) &Y); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(INCY), (void*) &INCY); |
| | | check_error(cl); |
| | | |
| | | const size_t global_size[] = {N}; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | | void scal_ongpu(int N, float ALPHA, cl_mem X, int INCX) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel kernel = get_scal_kernel(); |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | | cl_uint i = 0; |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(N), (void*) &N); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(ALPHA), (void*) &ALPHA); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(X), (void*) &X); |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(INCX), (void*) &INCX); |
| | | check_error(cl); |
| | | |
| | | const size_t global_size[] = {N}; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | | #endif |
| | |
| | | __kernel void axpy(int N, float ALPHA, __global float *X, int INCX, __global float *Y, int INCY) |
| | | { |
| | | int i = get_global_id(0); |
| | | Y[i*INCY] += ALPHA*X[i*INCX]; |
| | | } |
| | | |
| | | __kernel void scal(int N, float ALPHA, __global float *X, int INCX) |
| | | { |
| | | int i = get_global_id(0); |
| | | X[i*INCX] *= ALPHA; |
| | | } |
| | | |
| | | __kernel void copy(int N, __global float *X, int INCX, __global float *Y, int INCY) |
| | | { |
| | | int i = get_global_id(0); |
| | | Y[i*INCY] = X[i*INCX]; |
| | | } |
| | | |
| | |
| | | void test_convolutional_layer() |
| | | { |
| | | int i; |
| | | image dog = load_image("data/dog.jpg",256,256); |
| | | image dog = load_image("data/dog.jpg",224,224); |
| | | network net = parse_network_cfg("cfg/convolutional.cfg"); |
| | | // data test = load_cifar10_data("data/cifar10/test_batch.bin"); |
| | | // float *X = calloc(net.batch*test.X.cols, sizeof(float)); |
| | | // float *y = calloc(net.batch*test.y.cols, sizeof(float)); |
| | | int in_size = get_network_input_size(net)*net.batch; |
| | | int del_size = get_network_output_size_layer(net, 0)*net.batch; |
| | | int size = get_network_output_size(net)*net.batch; |
| | | float *X = calloc(in_size, sizeof(float)); |
| | | float *X = calloc(in_size, sizeof(float)); |
| | | float *y = calloc(size, sizeof(float)); |
| | | for(i = 0; i < in_size; ++i){ |
| | | X[i] = dog.data[i%get_network_input_size(net)]; |
| | | } |
| | | // get_batch(test, net.batch, X, y); |
| | | clock_t start, end; |
| | | cl_mem input_cl = cl_make_array(X, in_size); |
| | | cl_mem truth_cl = cl_make_array(y, size); |
| | | |
| | | forward_network_gpu(net, input_cl, 1); |
| | | forward_network_gpu(net, input_cl, truth_cl, 1); |
| | | start = clock(); |
| | | forward_network_gpu(net, input_cl, 1); |
| | | forward_network_gpu(net, input_cl, truth_cl, 1); |
| | | end = clock(); |
| | | float gpu_sec = (float)(end-start)/CLOCKS_PER_SEC; |
| | | printf("forward gpu: %f sec\n", gpu_sec); |
| | | start = clock(); |
| | | backward_network_gpu(net, input_cl); |
| | | end = clock(); |
| | | gpu_sec = (float)(end-start)/CLOCKS_PER_SEC; |
| | | printf("backward gpu: %f sec\n", gpu_sec); |
| | | //float gpu_cost = get_network_cost(net); |
| | | float *gpu_out = calloc(size, sizeof(float)); |
| | | memcpy(gpu_out, get_network_output(net), size*sizeof(float)); |
| | | |
| | | float *gpu_del = calloc(del_size, sizeof(float)); |
| | | memcpy(gpu_del, get_network_delta_layer(net, 0), del_size*sizeof(float)); |
| | | |
| | | /* |
| | | start = clock(); |
| | | forward_network(net, X, 1); |
| | | forward_network(net, X, y, 1); |
| | | backward_network(net, X); |
| | | float cpu_cost = get_network_cost(net); |
| | | end = clock(); |
| | | float cpu_sec = (float)(end-start)/CLOCKS_PER_SEC; |
| | | float *cpu_out = calloc(size, sizeof(float)); |
| | | memcpy(cpu_out, get_network_output(net), size*sizeof(float)); |
| | | float *cpu_del = calloc(del_size, sizeof(float)); |
| | | memcpy(cpu_del, get_network_delta_layer(net, 0), del_size*sizeof(float)); |
| | | |
| | | float sum = 0; |
| | | for(i = 0; i < size; ++i) { |
| | | //printf("%f, %f\n", gpu_out[i], cpu_out[i]); |
| | | sum += pow(gpu_out[i] - cpu_out[i], 2); |
| | | float del_sum = 0; |
| | | for(i = 0; i < size; ++i) sum += pow(gpu_out[i] - cpu_out[i], 2); |
| | | for(i = 0; i < del_size; ++i) { |
| | | //printf("%f %f\n", cpu_del[i], gpu_del[i]); |
| | | del_sum += pow(cpu_del[i] - gpu_del[i], 2); |
| | | } |
| | | printf("gpu: %f sec, cpu: %f sec, diff: %f, size: %d\n", gpu_sec, cpu_sec, sum, size); |
| | | printf("GPU cost: %f, CPU cost: %f\n", gpu_cost, cpu_cost); |
| | | printf("gpu: %f sec, cpu: %f sec, diff: %f, delta diff: %f, size: %d\n", gpu_sec, cpu_sec, sum, del_sum, size); |
| | | */ |
| | | } |
| | | |
| | | void test_col2im() |
| | | { |
| | | float col[] = {1,2,1,2, |
| | | 1,2,1,2, |
| | | 1,2,1,2, |
| | | 1,2,1,2, |
| | | 1,2,1,2, |
| | | 1,2,1,2, |
| | | 1,2,1,2, |
| | | 1,2,1,2, |
| | | 1,2,1,2}; |
| | | float im[16] = {0}; |
| | | int batch = 1; |
| | | int channels = 1; |
| | | int height=4; |
| | | int width=4; |
| | | int ksize = 3; |
| | | int stride = 1; |
| | | int pad = 0; |
| | | col2im_gpu(col, batch, |
| | | channels, height, width, |
| | | ksize, stride, pad, im); |
| | | int i; |
| | | for(i = 0; i < 16; ++i)printf("%f,", im[i]); |
| | | printf("\n"); |
| | | /* |
| | | float data_im[] = { |
| | | 1,2,3,4, |
| | | 5,6,7,8, |
| | | 9,10,11,12 |
| | | }; |
| | | float data_col[18] = {0}; |
| | | im2col_cpu(data_im, batch, |
| | | channels, height, width, |
| | | ksize, stride, pad, data_col) ; |
| | | for(i = 0; i < 18; ++i)printf("%f,", data_col[i]); |
| | | printf("\n"); |
| | | */ |
| | | } |
| | | |
| | | #endif |
| | |
| | | normalize_data_rows(test); |
| | | for(j = 0; j < test.X.rows; ++j){ |
| | | float *x = test.X.vals[j]; |
| | | forward_network(net, x, 0); |
| | | forward_network(net, x, 0, 0); |
| | | int class = get_predicted_class_network(net); |
| | | fprintf(fp, "%d\n", class); |
| | | } |
| | |
| | | |
| | | void test_cifar10() |
| | | { |
| | | |
| | | network net = parse_network_cfg("cfg/cifar10_part5.cfg"); |
| | | data test = load_cifar10_data("data/cifar10/test_batch.bin"); |
| | | clock_t start = clock(), end; |
| | |
| | | int index = rand()%m.rows; |
| | | //image p = float_to_image(1690,1,1,m.vals[index]); |
| | | //normalize_image(p); |
| | | forward_network(net, m.vals[index], 1); |
| | | forward_network(net, m.vals[index], 0, 1); |
| | | float *out = get_network_output(net); |
| | | float *delta = get_network_delta(net); |
| | | //printf("%f\n", out[0]); |
| | |
| | | matrix test = csv_to_matrix("test.csv"); |
| | | truth = pop_column(&test, 0); |
| | | for(i = 0; i < test.rows; ++i){ |
| | | forward_network(net, test.vals[i], 0); |
| | | forward_network(net, test.vals[i],0, 0); |
| | | float *out = get_network_output(net); |
| | | if(fabs(out[0]) < .5) fprintf(fp, "0\n"); |
| | | else fprintf(fp, "1\n"); |
| | |
| | | //normalize_array(im.data, im.h*im.w*im.c); |
| | | translate_image(im, -144); |
| | | resize_network(net, im.h, im.w, im.c); |
| | | forward_network(net, im.data, 0); |
| | | forward_network(net, im.data, 0, 0); |
| | | image out = get_network_image(net); |
| | | free_image(im); |
| | | cvReleaseImage(&sized); |
| | |
| | | resize_network(net, im.h, im.w, im.c); |
| | | //scale_image(im, 1./255); |
| | | translate_image(im, -144); |
| | | forward_network(net, im.data, 0); |
| | | forward_network(net, im.data, 0, 0); |
| | | image out = get_network_image(net); |
| | | |
| | | int dh = (im.h - h)/(out.h-1); |
| | |
| | | image im = load_image(image_path, 0, 0); |
| | | printf("Processing %dx%d image\n", im.h, im.w); |
| | | resize_network(net, im.h, im.w, im.c); |
| | | forward_network(net, im.data, 0); |
| | | forward_network(net, im.data, 0, 0); |
| | | image out = get_network_image(net); |
| | | |
| | | int dh = (im.h - h)/h; |
| | |
| | | image im = load_image("data/cat.png", 0, 0); |
| | | printf("Processing %dx%d image\n", im.h, im.w); |
| | | resize_network(net, im.h, im.w, im.c); |
| | | forward_network(net, im.data, 0); |
| | | forward_network(net, im.data, 0, 0); |
| | | |
| | | visualize_network(net); |
| | | cvWaitKey(0); |
| | |
| | | //test_ensemble(); |
| | | //test_nist_single(); |
| | | //test_nist(); |
| | | train_nist(); |
| | | //test_convolutional_layer(); |
| | | //train_nist(); |
| | | test_convolutional_layer(); |
| | | //test_col2im(); |
| | | //test_cifar10(); |
| | | //train_cifar10(); |
| | | //test_vince(); |
| | |
| | | cl.error = clSetKernelArg(kernel, i++, sizeof(data_im), (void*) &data_im); |
| | | check_error(cl); |
| | | |
| | | size_t global_size = {channels*height*width*batch}; |
| | | size_t global_size = channels*height*width*batch; |
| | | |
| | | clEnqueueNDRangeKernel(queue, kernel, 3, 0, |
| | | global_size, 0, 0, 0, 0); |
| | | clEnqueueNDRangeKernel(queue, kernel, 1, 0, |
| | | &global_size, 0, 0, 0, 0); |
| | | check_error(cl); |
| | | } |
| | | |
| | | void col2im_gpu(float *data_col, int batch, |
| | | int channels, int height, int width, |
| | | int ksize, int stride, int pad, float *data_im) |
| | | { |
| | | int height_col = (height - ksize) / stride + 1; |
| | | int width_col = (width - ksize) / stride + 1; |
| | | int channels_col = channels * ksize * ksize; |
| | | |
| | | size_t size = height_col*width_col*channels_col*batch; |
| | | cl_mem col_gpu = cl_make_array(data_col, size); |
| | | size = channels*height*width*batch; |
| | | cl_mem im_gpu = cl_make_array(data_im, size); |
| | | |
| | | col2im_ongpu(col_gpu, batch, channels, height, width, |
| | | ksize, stride, pad, im_gpu); |
| | | |
| | | cl_read_array(im_gpu, data_im, size); |
| | | clReleaseMemObject(col_gpu); |
| | | clReleaseMemObject(im_gpu); |
| | | } |
| | | |
| | | #endif |
| | |
| | | int index(int row, int col) |
| | | __kernel void col2im(__global float *data_col, int batch, |
| | | int channels, int height, int width, |
| | | int ksize, int stride, int pad, __global float *data_im) |
| | | { |
| | | |
| | | } |
| | | |
| | | __kernel void col2im(__global float *data_col, int batch, |
| | | int channels, int height, int width, |
| | | int ksize, int stride, int pad, __global float *data_im) |
| | | { |
| | | int id = get_global_id(0); |
| | | int index = id; |
| | | int w = id%width; |
| | | id /= width; |
| | | int h = id%height; |
| | | id /= height; |
| | | int c = id%channels; |
| | | id /= channels; |
| | | int b = id%batch; |
| | | |
| | | int height_col = (height - ksize) / stride + 1; |
| | | int width_col = (width - ksize) / stride + 1; |
| | | int rows = channels * ksize * ksize; |
| | | if (pad){ |
| | | height_col = 1 + (height-1) / stride; |
| | | width_col = 1 + (width-1) / stride; |
| | | pad = ksize/2; |
| | | } |
| | | |
| | | int id = get_global_id(0); |
| | | int index = id; |
| | | int w = id%width + pad; |
| | | id /= width; |
| | | int h = id%height + pad; |
| | | id /= height; |
| | | int c = id%channels; |
| | | id /= channels; |
| | | int b = id%batch; |
| | | |
| | | int w_start = (w<ksize)?0:(w-ksize)/stride + 1; |
| | | int w_end = w/stride + 1; |
| | | if(width_col < w_end) w_end = width_col; |
| | | |
| | | int h_start = (h<ksize)?0:(h-ksize)/stride+1; |
| | | int h_end = h/stride + 1; |
| | | if(height_col < h_end) h_end = height_col; |
| | | |
| | | int rows = channels * ksize * ksize; |
| | | int cols = height_col*width_col; |
| | | int batch_offset = b*cols*rows; |
| | | int channel_offset = c*cols*ksize*ksize; |
| | | data_col[index] = 0; |
| | | int i,j; |
| | | for(i = 0; i < ksize; ++i){ |
| | | row_offset = i*height_col*width_col; |
| | | for(j = 0; j < ksize; ++j){ |
| | | col_offset = |
| | | int offset = (c*ksize*ksize + h * ksize + w)*height_col*width_col; |
| | | offset += b*cols*rows; |
| | | int h_coeff = (1-stride*ksize*height_col)*width_col; |
| | | int w_coeff = 1-stride*height_col*width_col; |
| | | float val = 0; |
| | | int h_col, w_col; |
| | | for(h_col = h_start; h_col < h_end; ++h_col){ |
| | | for(w_col = w_start; w_col < w_end; ++w_col){ |
| | | val += data_col[offset +h_col*h_coeff + w_col*w_coeff]; |
| | | } |
| | | } |
| | | |
| | | data_col[col_index] = im2col_get_pixel(data_im, height, width, channels, b, im_row, im_col, c_im, pad); |
| | | data_im[index] = val; |
| | | } |
| | |
| | | layer->delta = calloc(batch*outputs, sizeof(float*)); |
| | | |
| | | layer->weight_updates = calloc(inputs*outputs, sizeof(float)); |
| | | layer->weight_adapt = calloc(inputs*outputs, sizeof(float)); |
| | | //layer->weight_adapt = calloc(inputs*outputs, sizeof(float)); |
| | | layer->weight_momentum = calloc(inputs*outputs, sizeof(float)); |
| | | layer->weights = calloc(inputs*outputs, sizeof(float)); |
| | | float scale = 1./inputs; |
| | |
| | | layer->weights[i] = scale*2*(rand_uniform()-.5); |
| | | |
| | | layer->bias_updates = calloc(outputs, sizeof(float)); |
| | | layer->bias_adapt = calloc(outputs, sizeof(float)); |
| | | //layer->bias_adapt = calloc(outputs, sizeof(float)); |
| | | layer->bias_momentum = calloc(outputs, sizeof(float)); |
| | | layer->biases = calloc(outputs, sizeof(float)); |
| | | for(i = 0; i < outputs; ++i) |
| | | for(i = 0; i < outputs; ++i){ |
| | | //layer->biases[i] = rand_normal()*scale + scale; |
| | | layer->biases[i] = 1; |
| | | } |
| | | |
| | | #ifdef GPU |
| | | #endif |
| | | layer->activation = activation; |
| | | return layer; |
| | | } |
| | |
| | | #define CONNECTED_LAYER_H |
| | | |
| | | #include "activations.h" |
| | | #include "opencl.h" |
| | | |
| | | typedef struct{ |
| | | float learning_rate; |
| | |
| | | float *output; |
| | | float *delta; |
| | | |
| | | #ifdef GPU |
| | | cl_mem weights_cl; |
| | | cl_mem biases_cl; |
| | | |
| | | cl_mem weight_updates_cl; |
| | | cl_mem bias_updates_cl; |
| | | |
| | | cl_mem weight_momentum_cl; |
| | | cl_mem bias_momentum_cl; |
| | | |
| | | cl_mem output_cl; |
| | | cl_mem delta_cl; |
| | | #endif |
| | | ACTIVATION activation; |
| | | |
| | | } connected_layer; |
| | |
| | | b = layer.delta; |
| | | c = layer.col_image; |
| | | |
| | | memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); |
| | | |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | gemm(1,0,m,n,k,1,a,m,b,n,0,c,n); |
| | | b += k*n; |
| | | c += m*n; |
| | | } |
| | | |
| | | memset(delta, 0, layer.batch*layer.h*layer.w*layer.c*sizeof(float)); |
| | | |
| | | col2im_cpu(layer.col_image, layer.batch, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, delta); |
| | | } |
| | | } |
| | |
| | | clReleaseMemObject(c); |
| | | } |
| | | activate_array_ongpu(layer.output_cl, m*n*layer.batch, layer.activation); |
| | | cl_read_array(layer.output_cl, layer.output, m*n*layer.batch); |
| | | //cl_read_array(layer.output_cl, layer.output, m*n*layer.batch); |
| | | } |
| | | |
| | | void backward_convolutional_layer_gpu(convolutional_layer layer, cl_mem delta_cl) |
| | |
| | | clReleaseMemObject(a); |
| | | clReleaseMemObject(b); |
| | | } |
| | | cl_read_array(layer.filter_updates_cl, layer.filter_updates, m*n); |
| | | cl_read_array(layer.bias_updates_cl, layer.bias_updates, m); |
| | | |
| | | //cl_read_array(layer.delta_cl, layer.delta, m*k*layer.batch); |
| | | |
| | | if(delta_cl){ |
| | | m = layer.size*layer.size*layer.c; |
| | |
| | | convolutional_out_width(layer); |
| | | |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | a = layer.filters_cl; |
| | | b = cl_sub_array(layer.delta_cl, i*k*n, k*n); |
| | | c = cl_sub_array(layer.col_image_cl, i*m*n, m*n); |
| | | cl_mem a = layer.filters_cl; |
| | | cl_mem b = cl_sub_array(layer.delta_cl, i*k*n, k*n); |
| | | cl_mem c = cl_sub_array(layer.col_image_cl, i*m*n, m*n); |
| | | |
| | | gemm_ongpu(1,0,m,n,k,1,a,m,b,n,0,c,n); |
| | | clReleaseMemObject(b); |
| | | clReleaseMemObject(c); |
| | | } |
| | | col2im_gpu(layer.col_image_cl, layer.batch, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, delta_cl); |
| | | |
| | | scal_ongpu(layer.batch*layer.h*layer.w*layer.c,0,delta_cl, 1); |
| | | col2im_ongpu(layer.col_image_cl, layer.batch, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, delta_cl); |
| | | } |
| | | } |
| | | |
| | | void update_convolutional_layer_gpu(convolutional_layer layer) |
| | | { |
| | | int size = layer.size*layer.size*layer.c*layer.n; |
| | | axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_cl, 1, layer.biases_cl, 1); |
| | | scal_ongpu(layer.n,layer.momentum, layer.bias_updates_cl, 1); |
| | | |
| | | scal_ongpu(size, 1.-layer.learning_rate*layer.decay, layer.filters_cl, 1); |
| | | axpy_ongpu(size, layer.learning_rate, layer.filter_updates_cl, 1, layer.filters_cl, 1); |
| | | scal_ongpu(size, layer.momentum, layer.filter_updates_cl, 1); |
| | | } |
| | | |
| | | |
| | | #endif |
| | | |
| | |
| | | #ifndef CONVOLUTIONAL_LAYER_H |
| | | #define CONVOLUTIONAL_LAYER_H |
| | | |
| | | #ifdef GPU |
| | | #include "opencl.h" |
| | | #endif |
| | | |
| | | #include "image.h" |
| | | #include "activations.h" |
| | | |
| | |
| | | #ifdef GPU |
| | | void forward_convolutional_layer_gpu(convolutional_layer layer, cl_mem in); |
| | | void backward_convolutional_layer_gpu(convolutional_layer layer, cl_mem delta_cl); |
| | | void update_convolutional_layer_gpu(convolutional_layer layer); |
| | | #endif |
| | | |
| | | convolutional_layer *make_convolutional_layer(int batch, int h, int w, int c, int n, int size, int stride, int pad, ACTIVATION activation, float learning_rate, float momentum, float decay); |
| New file |
| | |
| | | #include "cost_layer.h" |
| | | #include "mini_blas.h" |
| | | #include <math.h> |
| | | #include <stdlib.h> |
| | | #include <stdio.h> |
| | | |
| | | cost_layer *make_cost_layer(int batch, int inputs) |
| | | { |
| | | fprintf(stderr, "Cost Layer: %d inputs\n", inputs); |
| | | cost_layer *layer = calloc(1, sizeof(cost_layer)); |
| | | layer->batch = batch; |
| | | layer->inputs = inputs; |
| | | layer->delta = calloc(inputs*batch, sizeof(float)); |
| | | layer->output = calloc(1, sizeof(float)); |
| | | #ifdef GPU |
| | | layer->delta_cl = cl_make_array(layer->delta, inputs*batch); |
| | | #endif |
| | | return layer; |
| | | } |
| | | |
| | | void forward_cost_layer(cost_layer layer, float *input, float *truth) |
| | | { |
| | | if (!truth) return; |
| | | copy_cpu(layer.batch*layer.inputs, truth, 1, layer.delta, 1); |
| | | axpy_cpu(layer.batch*layer.inputs, -1, input, 1, layer.delta, 1); |
| | | *(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1); |
| | | } |
| | | |
| | | void backward_cost_layer(const cost_layer layer, float *input, float *delta) |
| | | { |
| | | copy_cpu(layer.batch*layer.inputs, layer.delta, 1, delta, 1); |
| | | } |
| | | |
| | | #ifdef GPU |
| | | void forward_cost_layer_gpu(cost_layer layer, cl_mem input, cl_mem truth) |
| | | { |
| | | if (!truth) return; |
| | | copy_ongpu(layer.batch*layer.inputs, truth, 1, layer.delta_cl, 1); |
| | | axpy_ongpu(layer.batch*layer.inputs, -1, input, 1, layer.delta_cl, 1); |
| | | cl_read_array(layer.delta_cl, layer.delta, layer.batch*layer.inputs); |
| | | *(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1); |
| | | } |
| | | |
| | | void backward_cost_layer_gpu(const cost_layer layer, cl_mem input, cl_mem delta) |
| | | { |
| | | copy_ongpu(layer.batch*layer.inputs, layer.delta_cl, 1, delta, 1); |
| | | } |
| | | #endif |
| | | |
| New file |
| | |
| | | #ifndef COST_LAYER_H |
| | | #define COST_LAYER_H |
| | | #include "opencl.h" |
| | | |
| | | typedef struct { |
| | | int inputs; |
| | | int batch; |
| | | float *delta; |
| | | float *output; |
| | | #ifdef GPU |
| | | cl_mem delta_cl; |
| | | #endif |
| | | } cost_layer; |
| | | |
| | | cost_layer *make_cost_layer(int batch, int inputs); |
| | | void forward_cost_layer(const cost_layer layer, float *input, float *truth); |
| | | void backward_cost_layer(const cost_layer layer, float *input, float *delta); |
| | | |
| | | #ifdef GPU |
| | | void forward_cost_layer_gpu(cost_layer layer, cl_mem input, cl_mem truth); |
| | | void backward_cost_layer_gpu(const cost_layer layer, cl_mem input, cl_mem delta); |
| | | #endif |
| | | |
| | | #endif |
| New file |
| | |
| | | #include "freeweight_layer.h" |
| | | #include "stdlib.h" |
| | | #include "stdio.h" |
| | | |
| | | freeweight_layer *make_freeweight_layer(int batch, int inputs) |
| | | { |
| | | fprintf(stderr, "Freeweight Layer: %d inputs\n", inputs); |
| | | freeweight_layer *layer = calloc(1, sizeof(freeweight_layer)); |
| | | layer->inputs = inputs; |
| | | layer->batch = batch; |
| | | return layer; |
| | | } |
| | | |
| | | void forward_freeweight_layer(freeweight_layer layer, float *input) |
| | | { |
| | | int i; |
| | | for(i = 0; i < layer.batch * layer.inputs; ++i){ |
| | | input[i] *= 2.*((float)rand()/RAND_MAX); |
| | | } |
| | | } |
| | | void backward_freeweight_layer(freeweight_layer layer, float *input, float *delta) |
| | | { |
| | | // Don't do shit LULZ |
| | | } |
| New file |
| | |
| | | #ifndef FREEWEIGHT_LAYER_H |
| | | #define FREEWEIGHT_LAYER_H |
| | | |
| | | typedef struct{ |
| | | int batch; |
| | | int inputs; |
| | | } freeweight_layer; |
| | | |
| | | freeweight_layer *make_freeweight_layer(int batch, int inputs); |
| | | |
| | | void forward_freeweight_layer(freeweight_layer layer, float *input); |
| | | void backward_freeweight_layer(freeweight_layer layer, float *input, float *delta); |
| | | |
| | | #endif |
| | |
| | | |
| | | float im2col_get_pixel(__global float *im, int height, int width, int channels, |
| | | int batch, int row, int col, int channel, int pad) |
| | | { |
| | |
| | | void time_random_matrix(int TA, int TB, int m, int k, int n); |
| | | |
| | | #ifdef GPU |
| | | void axpy_ongpu(int N, float ALPHA, cl_mem X, int INCX, cl_mem Y, int INCY); |
| | | void copy_ongpu(int N, cl_mem X, int INCX, cl_mem Y, int INCY); |
| | | void scal_ongpu(int N, float ALPHA, cl_mem X, int INCX); |
| | | void im2col_ongpu(cl_mem data_im, int batch, |
| | | int channels, int height, int width, |
| | | int ksize, int stride, int pad, cl_mem data_col); |
| | | |
| | | void col2im_gpu(float *data_col, int batch, |
| | | int channels, int height, int width, |
| | | int ksize, int stride, int pad, float *data_im); |
| | | void col2im_ongpu(cl_mem data_col, int batch, |
| | | int channels, int height, int width, |
| | | int ksize, int stride, int pad, cl_mem data_im); |
| | |
| | | float *B, int ldb, |
| | | float BETA, |
| | | float *C, int ldc); |
| | | void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); |
| | | void scal_cpu(int N, float ALPHA, float *X, int INCX); |
| | | inline void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); |
| | | inline void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); |
| | | inline void scal_cpu(int N, float ALPHA, float *X, int INCX); |
| | | inline float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); |
| | | void test_gpu_blas(); |
| | |
| | | #include "connected_layer.h" |
| | | #include "convolutional_layer.h" |
| | | #include "maxpool_layer.h" |
| | | #include "cost_layer.h" |
| | | #include "normalization_layer.h" |
| | | #include "freeweight_layer.h" |
| | | #include "softmax_layer.h" |
| | | #include "dropout_layer.h" |
| | | |
| | |
| | | } |
| | | |
| | | #ifdef GPU |
| | | void forward_network_gpu(network net, cl_mem input_cl, int train) |
| | | void forward_network_gpu(network net, cl_mem input, cl_mem truth, int train) |
| | | { |
| | | int i; |
| | | for(i = 0; i < net.n; ++i){ |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | forward_convolutional_layer_gpu(layer, input_cl); |
| | | input_cl = layer.output_cl; |
| | | forward_convolutional_layer_gpu(layer, input); |
| | | input = layer.output_cl; |
| | | } |
| | | else if(net.types[i] == COST){ |
| | | cost_layer layer = *(cost_layer *)net.layers[i]; |
| | | forward_cost_layer_gpu(layer, input, truth); |
| | | } |
| | | /* |
| | | else if(net.types[i] == CONNECTED){ |
| | |
| | | } |
| | | } |
| | | |
| | | void backward_network_gpu(network net, cl_mem input) |
| | | { |
| | | int i; |
| | | cl_mem prev_input; |
| | | cl_mem prev_delta; |
| | | for(i = net.n-1; i >= 0; --i){ |
| | | if(i == 0){ |
| | | prev_input = input; |
| | | prev_delta = 0; |
| | | }else{ |
| | | prev_input = get_network_output_cl_layer(net, i-1); |
| | | prev_delta = get_network_delta_cl_layer(net, i-1); |
| | | } |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | backward_convolutional_layer_gpu(layer, prev_delta); |
| | | } |
| | | else if(net.types[i] == COST){ |
| | | cost_layer layer = *(cost_layer *)net.layers[i]; |
| | | backward_cost_layer_gpu(layer, prev_input, prev_delta); |
| | | } |
| | | } |
| | | } |
| | | |
| | | void update_network_gpu(network net) |
| | | { |
| | | int i; |
| | | for(i = 0; i < net.n; ++i){ |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | update_convolutional_layer_gpu(layer); |
| | | } |
| | | else if(net.types[i] == MAXPOOL){ |
| | | //maxpool_layer layer = *(maxpool_layer *)net.layers[i]; |
| | | } |
| | | else if(net.types[i] == SOFTMAX){ |
| | | //maxpool_layer layer = *(maxpool_layer *)net.layers[i]; |
| | | } |
| | | else if(net.types[i] == NORMALIZATION){ |
| | | //maxpool_layer layer = *(maxpool_layer *)net.layers[i]; |
| | | } |
| | | else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | update_connected_layer(layer); |
| | | } |
| | | } |
| | | } |
| | | |
| | | cl_mem get_network_output_cl_layer(network net, int i) |
| | | { |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | return layer.output_cl; |
| | | } |
| | | return 0; |
| | | } |
| | | |
| | | cl_mem get_network_delta_cl_layer(network net, int i) |
| | | { |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | return layer.delta_cl; |
| | | } |
| | | return 0; |
| | | } |
| | | |
| | | #endif |
| | | |
| | | void forward_network(network net, float *input, int train) |
| | | void forward_network(network net, float *input, float *truth, int train) |
| | | { |
| | | int i; |
| | | for(i = 0; i < net.n; ++i){ |
| | |
| | | forward_crop_layer(layer, input); |
| | | input = layer.output; |
| | | } |
| | | else if(net.types[i] == COST){ |
| | | cost_layer layer = *(cost_layer *)net.layers[i]; |
| | | forward_cost_layer(layer, input, truth); |
| | | } |
| | | else if(net.types[i] == SOFTMAX){ |
| | | softmax_layer layer = *(softmax_layer *)net.layers[i]; |
| | | forward_softmax_layer(layer, input); |
| | |
| | | dropout_layer layer = *(dropout_layer *)net.layers[i]; |
| | | forward_dropout_layer(layer, input); |
| | | } |
| | | else if(net.types[i] == FREEWEIGHT){ |
| | | if(!train) continue; |
| | | freeweight_layer layer = *(freeweight_layer *)net.layers[i]; |
| | | forward_freeweight_layer(layer, input); |
| | | } |
| | | } |
| | | } |
| | | |
| | |
| | | } |
| | | float *get_network_output(network net) |
| | | { |
| | | return get_network_output_layer(net, net.n-1); |
| | | int i; |
| | | for(i = net.n-1; i > 0; --i) if(net.types[i] != COST) break; |
| | | return get_network_output_layer(net, i); |
| | | } |
| | | |
| | | float *get_network_delta_layer(network net, int i) |
| | |
| | | return 0; |
| | | } |
| | | |
| | | float get_network_cost(network net) |
| | | { |
| | | if(net.types[net.n-1] == COST){ |
| | | return ((cost_layer *)net.layers[net.n-1])->output[0]; |
| | | } |
| | | return 0; |
| | | } |
| | | |
| | | float *get_network_delta(network net) |
| | | { |
| | | return get_network_delta_layer(net, net.n-1); |
| | |
| | | return max_index(out, k); |
| | | } |
| | | |
| | | float backward_network(network net, float *input, float *truth) |
| | | void backward_network(network net, float *input) |
| | | { |
| | | float error = calculate_error_network(net, truth); |
| | | int i; |
| | | float *prev_input; |
| | | float *prev_delta; |
| | |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | backward_connected_layer(layer, prev_input, prev_delta); |
| | | } |
| | | else if(net.types[i] == COST){ |
| | | cost_layer layer = *(cost_layer *)net.layers[i]; |
| | | backward_cost_layer(layer, prev_input, prev_delta); |
| | | } |
| | | } |
| | | return error; |
| | | } |
| | | |
| | | float train_network_datum(network net, float *x, float *y) |
| | | { |
| | | forward_network(net, x, 1); |
| | | forward_network(net, x, y, 1); |
| | | //int class = get_predicted_class_network(net); |
| | | float error = backward_network(net, x, y); |
| | | backward_network(net, x); |
| | | float error = get_network_cost(net); |
| | | update_network(net); |
| | | //return (y[class]?1:0); |
| | | return error; |
| | |
| | | int index = rand()%d.X.rows; |
| | | float *x = d.X.vals[index]; |
| | | float *y = d.y.vals[index]; |
| | | forward_network(net, x, 1); |
| | | sum += backward_network(net, x, y); |
| | | forward_network(net, x, y, 1); |
| | | backward_network(net, x); |
| | | sum += get_network_cost(net); |
| | | } |
| | | update_network(net); |
| | | } |
| | |
| | | else if(net.types[i] == CONNECTED){ |
| | | connected_layer layer = *(connected_layer *)net.layers[i]; |
| | | return layer.outputs; |
| | | } else if(net.types[i] == DROPOUT){ |
| | | } |
| | | else if(net.types[i] == DROPOUT){ |
| | | dropout_layer layer = *(dropout_layer *) net.layers[i]; |
| | | return layer.inputs; |
| | | } |
| | |
| | | |
| | | int get_network_output_size(network net) |
| | | { |
| | | int i = net.n-1; |
| | | int i; |
| | | for(i = net.n-1; i > 0; --i) if(net.types[i] != COST) break; |
| | | return get_network_output_size_layer(net, i); |
| | | } |
| | | |
| | |
| | | |
| | | float *network_predict(network net, float *input) |
| | | { |
| | | forward_network(net, input, 0); |
| | | forward_network(net, input, 0, 0); |
| | | float *out = get_network_output(net); |
| | | return out; |
| | | } |
| | |
| | | SOFTMAX, |
| | | NORMALIZATION, |
| | | DROPOUT, |
| | | CROP |
| | | FREEWEIGHT, |
| | | CROP, |
| | | COST |
| | | } LAYER_TYPE; |
| | | |
| | | typedef struct { |
| | |
| | | } network; |
| | | |
| | | #ifdef GPU |
| | | void forward_network_gpu(network net, cl_mem input, int train); |
| | | void forward_network_gpu(network net, cl_mem input, cl_mem truth, int train); |
| | | void backward_network_gpu(network net, cl_mem input); |
| | | void update_network_gpu(network net); |
| | | cl_mem get_network_output_cl_layer(network net, int i); |
| | | cl_mem get_network_delta_cl_layer(network net, int i); |
| | | #endif |
| | | |
| | | network make_network(int n, int batch); |
| | | void forward_network(network net, float *input, int train); |
| | | float backward_network(network net, float *input, float *truth); |
| | | void forward_network(network net, float *input, float *truth, int train); |
| | | void backward_network(network net, float *input); |
| | | void update_network(network net); |
| | | float train_network_sgd(network net, data d, int n); |
| | | float train_network_batch(network net, data d, int n); |
| | |
| | | void visualize_network(network net); |
| | | int resize_network(network net, int h, int w, int c); |
| | | int get_network_input_size(network net); |
| | | float get_network_cost(network net); |
| | | |
| | | #endif |
| | | |
| | |
| | | #ifdef GPU |
| | | #include "opencl.h" |
| | | #include <stdio.h> |
| | | #include <stdlib.h> |
| | | #include <string.h> |
| | | #include <time.h> |
| | | #include <unistd.h> |
| | | |
| | | #include "opencl.h" |
| | | #include "utils.h" |
| | | |
| | | cl_info cl = {0}; |
| | | |
| | |
| | | char src[64*1024]; |
| | | memset(src, 0, 64*1024); |
| | | FILE *fil=fopen(filename,"r"); |
| | | if(fil == 0) file_error(filename); |
| | | srcsize=fread(src, sizeof src, 1, fil); |
| | | fclose(fil); |
| | | const char *srcptr[]={src}; |
| | |
| | | #include "parser.h" |
| | | #include "activations.h" |
| | | #include "crop_layer.h" |
| | | #include "cost_layer.h" |
| | | #include "convolutional_layer.h" |
| | | #include "connected_layer.h" |
| | | #include "maxpool_layer.h" |
| | | #include "normalization_layer.h" |
| | | #include "softmax_layer.h" |
| | | #include "dropout_layer.h" |
| | | #include "freeweight_layer.h" |
| | | #include "list.h" |
| | | #include "option_list.h" |
| | | #include "utils.h" |
| | |
| | | int is_connected(section *s); |
| | | int is_maxpool(section *s); |
| | | int is_dropout(section *s); |
| | | int is_freeweight(section *s); |
| | | int is_softmax(section *s); |
| | | int is_crop(section *s); |
| | | int is_cost(section *s); |
| | | int is_normalization(section *s); |
| | | list *read_cfg(char *filename); |
| | | |
| | |
| | | return layer; |
| | | } |
| | | |
| | | cost_layer *parse_cost(list *options, network *net, int count) |
| | | { |
| | | int input; |
| | | if(count == 0){ |
| | | input = option_find_int(options, "input",1); |
| | | net->batch = option_find_int(options, "batch",1); |
| | | }else{ |
| | | input = get_network_output_size_layer(*net, count-1); |
| | | } |
| | | cost_layer *layer = make_cost_layer(net->batch, input); |
| | | option_unused(options); |
| | | return layer; |
| | | } |
| | | |
| | | crop_layer *parse_crop(list *options, network *net, int count) |
| | | { |
| | | float learning_rate, momentum, decay; |
| | |
| | | return layer; |
| | | } |
| | | |
| | | freeweight_layer *parse_freeweight(list *options, network *net, int count) |
| | | { |
| | | int input; |
| | | if(count == 0){ |
| | | net->batch = option_find_int(options, "batch",1); |
| | | input = option_find_int(options, "input",1); |
| | | }else{ |
| | | input = get_network_output_size_layer(*net, count-1); |
| | | } |
| | | freeweight_layer *layer = make_freeweight_layer(net->batch,input); |
| | | option_unused(options); |
| | | return layer; |
| | | } |
| | | |
| | | dropout_layer *parse_dropout(list *options, network *net, int count) |
| | | { |
| | | int input; |
| | |
| | | crop_layer *layer = parse_crop(options, &net, count); |
| | | net.types[count] = CROP; |
| | | net.layers[count] = layer; |
| | | }else if(is_cost(s)){ |
| | | cost_layer *layer = parse_cost(options, &net, count); |
| | | net.types[count] = COST; |
| | | net.layers[count] = layer; |
| | | }else if(is_softmax(s)){ |
| | | softmax_layer *layer = parse_softmax(options, &net, count); |
| | | net.types[count] = SOFTMAX; |
| | |
| | | dropout_layer *layer = parse_dropout(options, &net, count); |
| | | net.types[count] = DROPOUT; |
| | | net.layers[count] = layer; |
| | | }else if(is_freeweight(s)){ |
| | | freeweight_layer *layer = parse_freeweight(options, &net, count); |
| | | net.types[count] = FREEWEIGHT; |
| | | net.layers[count] = layer; |
| | | }else{ |
| | | fprintf(stderr, "Type not recognized: %s\n", s->type); |
| | | } |
| | |
| | | { |
| | | return (strcmp(s->type, "[crop]")==0); |
| | | } |
| | | int is_cost(section *s) |
| | | { |
| | | return (strcmp(s->type, "[cost]")==0); |
| | | } |
| | | int is_convolutional(section *s) |
| | | { |
| | | return (strcmp(s->type, "[conv]")==0 |
| | |
| | | { |
| | | return (strcmp(s->type, "[dropout]")==0); |
| | | } |
| | | int is_freeweight(section *s) |
| | | { |
| | | return (strcmp(s->type, "[freeweight]")==0); |
| | | } |
| | | |
| | | int is_softmax(section *s) |
| | | { |
| | |
| | | for(i = 0; i < l->n*l->c*l->size*l->size; ++i) fprintf(fp, "%g,", l->filters[i]); |
| | | fprintf(fp, "\n\n"); |
| | | } |
| | | |
| | | void print_freeweight_cfg(FILE *fp, freeweight_layer *l, network net, int count) |
| | | { |
| | | fprintf(fp, "[freeweight]\n"); |
| | | if(count == 0){ |
| | | fprintf(fp, "batch=%d\ninput=%d\n",l->batch, l->inputs); |
| | | } |
| | | fprintf(fp, "\n"); |
| | | } |
| | | |
| | | void print_dropout_cfg(FILE *fp, dropout_layer *l, network net, int count) |
| | | { |
| | | fprintf(fp, "[dropout]\n"); |
| | | if(count == 0){ |
| | | fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs); |
| | | } |
| | | fprintf(fp, "probability=%g\n\n", l->probability); |
| | | } |
| | | |
| | | void print_connected_cfg(FILE *fp, connected_layer *l, network net, int count) |
| | | { |
| | | int i; |
| | |
| | | fprintf(fp, "\n"); |
| | | } |
| | | |
| | | void print_cost_cfg(FILE *fp, cost_layer *l, network net, int count) |
| | | { |
| | | fprintf(fp, "[cost]\n"); |
| | | if(count == 0) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs); |
| | | fprintf(fp, "\n"); |
| | | } |
| | | |
| | | |
| | | void save_network(network net, char *filename) |
| | | { |
| | | FILE *fp = fopen(filename, "w"); |
| | |
| | | print_crop_cfg(fp, (crop_layer *)net.layers[i], net, i); |
| | | else if(net.types[i] == MAXPOOL) |
| | | print_maxpool_cfg(fp, (maxpool_layer *)net.layers[i], net, i); |
| | | else if(net.types[i] == FREEWEIGHT) |
| | | print_freeweight_cfg(fp, (freeweight_layer *)net.layers[i], net, i); |
| | | else if(net.types[i] == DROPOUT) |
| | | print_dropout_cfg(fp, (dropout_layer *)net.layers[i], net, i); |
| | | else if(net.types[i] == NORMALIZATION) |
| | | print_normalization_cfg(fp, (normalization_layer *)net.layers[i], net, i); |
| | | else if(net.types[i] == SOFTMAX) |
| | | print_softmax_cfg(fp, (softmax_layer *)net.layers[i], net, i); |
| | | else if(net.types[i] == COST) |
| | | print_cost_cfg(fp, (cost_layer *)net.layers[i], net, i); |
| | | } |
| | | fclose(fp); |
| | | } |