Attempt at visualizing ImageNet Features
9 files modified
5 files added
| | |
| | | COMMON=-Wall `pkg-config --cflags opencv` |
| | | UNAME = $(shell uname) |
| | | ifeq ($(UNAME), Darwin) |
| | | COMMON += -isystem /usr/local/Cellar/opencv/2.4.6.1/include/opencv -isystem /usr/local/Cellar/opencv/2.4.6.1/include |
| | | COMMON+= -isystem /usr/local/Cellar/opencv/2.4.6.1/include/opencv -isystem /usr/local/Cellar/opencv/2.4.6.1/include |
| | | LDFLAGS= -framework OpenCL |
| | | else |
| | | COMMON += -march=native -flto |
| | | COMMON+= -march=native -flto |
| | | LDFLAGS= -lOpenCL |
| | | endif |
| | | CFLAGS= $(COMMON) -Ofast |
| | | #CFLAGS= $(COMMON) -O0 -g |
| | | LDFLAGS=`pkg-config --libs opencv` -lm |
| | | LDFLAGS+=`pkg-config --libs opencv` -lm |
| | | VPATH=./src/ |
| | | EXEC=cnn |
| | | |
| | | OBJ=network.o image.o tests.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o softmax_layer.o mini_blas.o convolutional_layer.o |
| | | OBJ=network.o image.o tests.o connected_layer.o maxpool_layer.o activations.o list.o option_list.o parser.o utils.o data.o matrix.o softmax_layer.o mini_blas.o convolutional_layer.o opencl.o gpu_gemm.o cpu_gemm.o |
| | | |
| | | all: $(EXEC) |
| | | |
| | |
| | | return float_to_image(h,w,c,layer.filters+i*h*w*c); |
| | | } |
| | | |
| | | void visualize_convolutional_layer(convolutional_layer layer, char *window) |
| | | image *weighted_sum_filters(convolutional_layer layer, image *prev_filters) |
| | | { |
| | | int color = 1; |
| | | int border = 1; |
| | | int h,w,c; |
| | | int size = layer.size; |
| | | h = size; |
| | | w = (size + border) * layer.n - border; |
| | | c = layer.c; |
| | | if(c != 3 || !color){ |
| | | h = (h+border)*c - border; |
| | | c = 1; |
| | | image *filters = calloc(layer.n, sizeof(image)); |
| | | int i,j,k,c; |
| | | if(!prev_filters){ |
| | | for(i = 0; i < layer.n; ++i){ |
| | | filters[i] = copy_image(get_convolutional_filter(layer, i)); |
| | | } |
| | | } |
| | | |
| | | image filters = make_image(h,w,c); |
| | | int i,j; |
| | | for(i = 0; i < layer.n; ++i){ |
| | | int w_offset = i*(size+border); |
| | | image k = get_convolutional_filter(layer, i); |
| | | //printf("%f ** ", layer.biases[i]); |
| | | //print_image(k); |
| | | image copy = copy_image(k); |
| | | normalize_image(copy); |
| | | for(j = 0; j < k.c; ++j){ |
| | | //set_pixel(copy,0,0,j,layer.biases[i]); |
| | | } |
| | | if(c == 3 && color){ |
| | | embed_image(copy, filters, 0, w_offset); |
| | | } |
| | | else{ |
| | | for(j = 0; j < k.c; ++j){ |
| | | int h_offset = j*(size+border); |
| | | image layer = get_image_layer(k, j); |
| | | embed_image(layer, filters, h_offset, w_offset); |
| | | free_image(layer); |
| | | else{ |
| | | image base = prev_filters[0]; |
| | | for(i = 0; i < layer.n; ++i){ |
| | | image filter = get_convolutional_filter(layer, i); |
| | | filters[i] = make_image(base.h, base.w, base.c); |
| | | for(j = 0; j < layer.size; ++j){ |
| | | for(k = 0; k < layer.size; ++k){ |
| | | for(c = 0; c < layer.c; ++c){ |
| | | float weight = get_pixel(filter, j, k, c); |
| | | image prev_filter = copy_image(prev_filters[c]); |
| | | scale_image(prev_filter, weight); |
| | | add_into_image(prev_filter, filters[i], 0,0); |
| | | free_image(prev_filter); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | free_image(copy); |
| | | } |
| | | return filters; |
| | | } |
| | | |
| | | image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_filters) |
| | | { |
| | | image *single_filters = weighted_sum_filters(layer, 0); |
| | | show_images(single_filters, layer.n, window); |
| | | |
| | | image delta = get_convolutional_delta(layer); |
| | | image dc = collapse_image_layers(delta, 1); |
| | | char buff[256]; |
| | | sprintf(buff, "%s: Delta", window); |
| | | show_image(dc, buff); |
| | | //show_image(dc, buff); |
| | | free_image(dc); |
| | | show_image(filters, window); |
| | | free_image(filters); |
| | | return single_filters; |
| | | } |
| | | |
| | |
| | | void forward_convolutional_layer(const convolutional_layer layer, float *in); |
| | | void learn_convolutional_layer(convolutional_layer layer); |
| | | void update_convolutional_layer(convolutional_layer layer, float step, float momentum, float decay); |
| | | void visualize_convolutional_layer(convolutional_layer layer, char *window); |
| | | image *visualize_convolutional_layer(convolutional_layer layer, char *window, image *prev_filters); |
| | | |
| | | void backward_convolutional_layer(convolutional_layer layer, float *delta); |
| | | |
| New file |
| | |
| | | #include "mini_blas.h" |
| | | |
| | | void cpu_gemm_nn(int TA, int TB, int M, int N, int K, float ALPHA, |
| | | float *A, int lda, |
| | | float *B, int ldb, |
| | | float BETA, |
| | | float *C, int ldc) |
| | | { |
| | | int i,j,k; |
| | | for(i = 0; i < M; ++i){ |
| | | for(k = 0; k < K; ++k){ |
| | | register float A_PART = ALPHA*A[i*lda+k]; |
| | | for(j = 0; j < N; ++j){ |
| | | C[i*ldc+j] += A_PART*B[k*ldb+j]; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | |
| | | void cpu_gemm_nt(int TA, int TB, int M, int N, int K, float ALPHA, |
| | | float *A, int lda, |
| | | float *B, int ldb, |
| | | float BETA, |
| | | float *C, int ldc) |
| | | { |
| | | int i,j,k; |
| | | for(i = 0; i < M; ++i){ |
| | | for(j = 0; j < N; ++j){ |
| | | register float sum = 0; |
| | | for(k = 0; k < K; ++k){ |
| | | sum += ALPHA*A[i*lda+k]*B[k+j*ldb]; |
| | | } |
| | | C[i*ldc+j] += sum; |
| | | } |
| | | } |
| | | } |
| | | |
| | | void cpu_gemm_tn(int TA, int TB, int M, int N, int K, float ALPHA, |
| | | float *A, int lda, |
| | | float *B, int ldb, |
| | | float BETA, |
| | | float *C, int ldc) |
| | | { |
| | | int i,j,k; |
| | | for(i = 0; i < M; ++i){ |
| | | for(k = 0; k < K; ++k){ |
| | | register float A_PART = ALPHA*A[k*lda+i]; |
| | | for(j = 0; j < N; ++j){ |
| | | C[i*ldc+j] += A_PART*B[k*ldb+j]; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | void cpu_gemm_tt(int TA, int TB, int M, int N, int K, float ALPHA, |
| | | float *A, int lda, |
| | | float *B, int ldb, |
| | | float BETA, |
| | | float *C, int ldc) |
| | | { |
| | | int i,j,k; |
| | | for(i = 0; i < M; ++i){ |
| | | for(j = 0; j < N; ++j){ |
| | | for(k = 0; k < K; ++k){ |
| | | C[i*ldc+j] += ALPHA*A[i+k*lda]*B[k+j*ldb]; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | |
| | | |
| | | void cpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA, |
| | | float *A, int lda, |
| | | float *B, int ldb, |
| | | float BETA, |
| | | float *C, int ldc) |
| | | { |
| | | // Assume beta = 1 LULZ |
| | | if(!TA && !TB) |
| | | cpu_gemm_nn( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); |
| | | else if(TA && !TB) |
| | | cpu_gemm_tn( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); |
| | | else if(!TA && TB) |
| | | cpu_gemm_nt( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); |
| | | else |
| | | cpu_gemm_tt( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); |
| | | } |
| New file |
| | |
| | | |
| | | |
| | | __kernel void gemm(int TA, int TB, int M, int N, int K, float ALPHA, |
| | | __global float *A, int lda, |
| | | __global float *B, int ldb, |
| | | float BETA, |
| | | __global float *C, int ldc) |
| | | { |
| | | __local float Asub[BLOCK][BLOCK]; |
| | | __local float Bsub[BLOCK][BLOCK]; |
| | | |
| | | float val = 0; |
| | | |
| | | int row_block = get_group_id(0); |
| | | int col_block = get_group_id(1); |
| | | |
| | | int sub_row = get_local_id(0); |
| | | int sub_col = get_local_id(1); |
| | | |
| | | int row = row_block*BLOCK + sub_row; |
| | | int col = col_block*BLOCK + sub_col; |
| | | |
| | | int i,j; |
| | | for(i = 0; i < K; i += BLOCK){ |
| | | int arow = row_block*BLOCK + sub_row; |
| | | int acol = i + sub_col; |
| | | |
| | | int brow = i + sub_row; |
| | | int bcol = col_block*BLOCK + sub_col; |
| | | |
| | | Asub[sub_row][sub_col] = TA ? A[arow + acol*lda] : A[arow*lda + acol]; |
| | | Bsub[sub_row][sub_col] = TB ? B[brow + bcol*ldb] : B[brow*ldb + bcol]; |
| | | |
| | | barrier(CLK_LOCAL_MEM_FENCE); |
| | | |
| | | for(j = 0; j < BLOCK && i+j<K; ++j){ |
| | | val += Asub[sub_row][j]*Bsub[j][sub_col]; |
| | | } |
| | | barrier(CLK_LOCAL_MEM_FENCE); |
| | | } |
| | | |
| | | if(row < M && col < N){ |
| | | C[row*ldc+col] = val; |
| | | } |
| | | } |
| | | |
| | | /* |
| | | __kernel void gemm_slow(int TA, int TB, int M, int N, int K, float ALPHA, |
| | | __global float *A, int lda, |
| | | __global float *B, int ldb, |
| | | float BETA, |
| | | __global float *C, int ldc) |
| | | { |
| | | float val = 0; |
| | | int row = get_global_id(0); |
| | | int col = get_global_id(1); |
| | | int i; |
| | | for(i = 0; i < K; ++i){ |
| | | float Aval; |
| | | if(TA) Aval = A[i*lda+row]; |
| | | else Aval = A[row*lda+i]; |
| | | |
| | | float Bval; |
| | | if(TB) Bval = B[col*ldb+i]; |
| | | else Bval = B[col+i*ldb]; |
| | | |
| | | val += Aval*Bval; |
| | | } |
| | | C[row*ldc+col] = val; |
| | | } |
| | | |
| | | */ |
| New file |
| | |
| | | #include <stdio.h> |
| | | #include <stdlib.h> |
| | | #include <string.h> |
| | | #include <time.h> |
| | | #include <math.h> |
| | | |
| | | #include "opencl.h" |
| | | #include "mini_blas.h" |
| | | |
| | | #define STR_HELPER(x) #x |
| | | #define STR(x) STR_HELPER(x) |
| | | |
| | | #define BLOCK 8 |
| | | |
| | | cl_kernel get_gemm_kernel() |
| | | { |
| | | static int init = 0; |
| | | static cl_kernel gemm_kernel; |
| | | if(!init){ |
| | | gemm_kernel = get_kernel("src/gemm.cl", "gemm", "-D BLOCK=" STR(BLOCK) ); |
| | | init = 1; |
| | | } |
| | | return gemm_kernel; |
| | | } |
| | | |
| | | void gpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA, |
| | | float *A, int lda, |
| | | float *B, int ldb, |
| | | float BETA, |
| | | float *C, int ldc) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel gemm_kernel = get_gemm_kernel(); |
| | | cl_context context = cl.context; |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | | size_t size = sizeof(float)*(TA ? lda*K:lda*M); |
| | | cl_mem A_gpu = clCreateBuffer(context, |
| | | CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, |
| | | size, A, &cl.error); |
| | | check_error(cl); |
| | | |
| | | size = sizeof(float)*(TB ? ldb*N:ldb*K); |
| | | cl_mem B_gpu = clCreateBuffer(context, |
| | | CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, |
| | | size, B, &cl.error); |
| | | check_error(cl); |
| | | |
| | | size = sizeof(float)*(ldc*M); |
| | | cl_mem C_gpu = clCreateBuffer(context, |
| | | CL_MEM_WRITE_ONLY|CL_MEM_COPY_HOST_PTR, |
| | | size, C, &cl.error); |
| | | check_error(cl); |
| | | |
| | | cl_uint i = 0; |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(TA), (void*) &TA); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(TB), (void*) &TB); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(M), (void*) &M); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(N), (void*) &N); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(K), (void*) &K); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(ALPHA), (void*) &ALPHA); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(A_gpu), (void*) &A_gpu); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(lda), (void*) &lda); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(B_gpu), (void*) &B_gpu); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(ldb), (void*) &ldb); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(BETA), (void*) &BETA); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(C_gpu), (void*) &C_gpu); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(ldc), (void*) &ldc); |
| | | check_error(cl); |
| | | |
| | | const size_t global_size[] = {ceil((float)M/BLOCK)*BLOCK, ceil((float)N/BLOCK)*BLOCK}; |
| | | const size_t local_size[] = {BLOCK, BLOCK}; |
| | | //printf("%zd %zd %zd %zd\n", global_size[0], global_size[1], local_size[0], local_size[1]); |
| | | |
| | | clEnqueueNDRangeKernel(queue, gemm_kernel, 2, 0, global_size, local_size, 0, 0, 0); |
| | | check_error(cl); |
| | | clEnqueueReadBuffer(queue, C_gpu, CL_TRUE, 0, size, C, 0, 0, 0); |
| | | check_error(cl); |
| | | |
| | | clReleaseMemObject(A_gpu); |
| | | clReleaseMemObject(B_gpu); |
| | | clReleaseMemObject(C_gpu); |
| | | |
| | | } |
| | | |
| | | /* |
| | | cl_kernel get_gemm_kernel_slow() |
| | | { |
| | | static int init = 0; |
| | | static cl_kernel gemm_kernel; |
| | | if(!init){ |
| | | gemm_kernel = get_kernel("src/gemm.cl", "gemm_slow"); |
| | | init = 1; |
| | | } |
| | | return gemm_kernel; |
| | | } |
| | | |
| | | void gpu_gemm_slow(int TA, int TB, int M, int N, int K, float ALPHA, |
| | | float *A, int lda, |
| | | float *B, int ldb, |
| | | float BETA, |
| | | float *C, int ldc) |
| | | { |
| | | cl_setup(); |
| | | cl_kernel gemm_kernel = get_gemm_kernel_slow(); |
| | | cl_context context = cl.context; |
| | | cl_command_queue queue = cl.queue; |
| | | |
| | | size_t size = sizeof(float)*(TA ? lda*K:lda*M); |
| | | cl_mem A_gpu = clCreateBuffer(context, |
| | | CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, |
| | | size, A, &cl.error); |
| | | check_error(cl); |
| | | |
| | | size = sizeof(float)*(TB ? ldb*N:ldb*K); |
| | | cl_mem B_gpu = clCreateBuffer(context, |
| | | CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, |
| | | size, B, &cl.error); |
| | | check_error(cl); |
| | | |
| | | size = sizeof(float)*(ldc*M); |
| | | cl_mem C_gpu = clCreateBuffer(context, |
| | | CL_MEM_READ_ONLY|CL_MEM_COPY_HOST_PTR, |
| | | size, C, &cl.error); |
| | | check_error(cl); |
| | | |
| | | cl_uint i = 0; |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(TA), (void*) &TA); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(TB), (void*) &TB); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(M), (void*) &M); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(N), (void*) &N); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(K), (void*) &K); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(ALPHA), (void*) &ALPHA); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(A_gpu), (void*) &A_gpu); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(lda), (void*) &lda); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(B_gpu), (void*) &B_gpu); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(ldb), (void*) &ldb); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(BETA), (void*) &BETA); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(C_gpu), (void*) &C_gpu); |
| | | cl.error = clSetKernelArg(gemm_kernel, i++, sizeof(ldc), (void*) &ldc); |
| | | check_error(cl); |
| | | |
| | | const size_t global_size[] = {M, N}; |
| | | |
| | | clEnqueueNDRangeKernel(queue, gemm_kernel, 2, 0, global_size, 0, 0, 0, 0); |
| | | clEnqueueReadBuffer(queue, C_gpu, CL_TRUE, 0, size, C, 0, 0, 0); |
| | | |
| | | clReleaseMemObject(A_gpu); |
| | | clReleaseMemObject(B_gpu); |
| | | clReleaseMemObject(C_gpu); |
| | | |
| | | } |
| | | */ |
| | |
| | | return copy; |
| | | } |
| | | |
| | | |
| | | void show_image(image p, char *name) |
| | | { |
| | | int i,j,k; |
| | |
| | | cvReleaseImage(&disp); |
| | | } |
| | | |
| | | void save_image(image p, char *name) |
| | | { |
| | | int i,j,k; |
| | | image copy = copy_image(p); |
| | | normalize_image(copy); |
| | | |
| | | char buff[256]; |
| | | //sprintf(buff, "%s (%d)", name, windows); |
| | | sprintf(buff, "%s.png", name); |
| | | |
| | | IplImage *disp = cvCreateImage(cvSize(p.w,p.h), IPL_DEPTH_8U, p.c); |
| | | int step = disp->widthStep; |
| | | for(i = 0; i < p.h; ++i){ |
| | | for(j = 0; j < p.w; ++j){ |
| | | for(k= 0; k < p.c; ++k){ |
| | | disp->imageData[i*step + j*p.c + k] = (unsigned char)(get_pixel(copy,i,j,k)*255); |
| | | } |
| | | } |
| | | } |
| | | free_image(copy); |
| | | cvSaveImage(buff, disp,0); |
| | | cvReleaseImage(&disp); |
| | | } |
| | | |
| | | void show_image_layers(image p, char *name) |
| | | { |
| | | int i; |
| | |
| | | return out; |
| | | } |
| | | |
| | | void add_into_image(image src, image dest, int h, int w) |
| | | { |
| | | int i,j,k; |
| | | for(k = 0; k < src.c; ++k){ |
| | | for(i = 0; i < src.h; ++i){ |
| | | for(j = 0; j < src.w; ++j){ |
| | | add_pixel(dest, h+i, w+j, k, get_pixel(src, i, j, k)); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | |
| | | void add_scalar_image(image m, float s) |
| | | { |
| | | int i; |
| | |
| | | } |
| | | return out; |
| | | } |
| | | image get_sub_image(image m, int h, int w, int dh, int dw) |
| | | { |
| | | image out = make_image(dh, dw, m.c); |
| | | int i,j,k; |
| | | for(k = 0; k < out.c; ++k){ |
| | | for(i = 0; i < dh; ++i){ |
| | | for(j = 0; j < dw; ++j){ |
| | | float val = get_pixel(m, h+i, w+j, k); |
| | | set_pixel(out, i, j, k, val); |
| | | } |
| | | } |
| | | } |
| | | return out; |
| | | } |
| | | |
| | | float get_pixel(image m, int x, int y, int c) |
| | | { |
| | |
| | | printf("\n"); |
| | | } |
| | | |
| | | image collapse_images(image *ims, int n) |
| | | { |
| | | int color = 1; |
| | | int border = 1; |
| | | int h,w,c; |
| | | int size = ims[0].h; |
| | | h = size; |
| | | w = (size + border) * n - border; |
| | | c = ims[0].c; |
| | | if(c != 3 || !color){ |
| | | h = (h+border)*c - border; |
| | | c = 1; |
| | | } |
| | | |
| | | image filters = make_image(h,w,c); |
| | | int i,j; |
| | | for(i = 0; i < n; ++i){ |
| | | int w_offset = i*(size+border); |
| | | image copy = copy_image(ims[i]); |
| | | normalize_image(copy); |
| | | if(c == 3 && color){ |
| | | embed_image(copy, filters, 0, w_offset); |
| | | } |
| | | else{ |
| | | for(j = 0; j < copy.c; ++j){ |
| | | int h_offset = j*(size+border); |
| | | image layer = get_image_layer(copy, j); |
| | | embed_image(layer, filters, h_offset, w_offset); |
| | | free_image(layer); |
| | | } |
| | | } |
| | | free_image(copy); |
| | | } |
| | | return filters; |
| | | } |
| | | |
| | | void show_images(image *ims, int n, char *window) |
| | | { |
| | | image m = collapse_images(ims, n); |
| | | show_image(m, window); |
| | | free_image(m); |
| | | } |
| | | |
| | | void free_image(image m) |
| | | { |
| | | free(m.data); |
| | |
| | | void subtract_image(image a, image b); |
| | | float avg_image_layer(image m, int l); |
| | | void embed_image(image source, image dest, int h, int w); |
| | | void add_into_image(image src, image dest, int h, int w); |
| | | image collapse_image_layers(image source, int border); |
| | | image get_sub_image(image m, int h, int w, int dh, int dw); |
| | | |
| | | void show_image(image p, char *name); |
| | | void save_image(image p, char *name); |
| | | void show_images(image *ims, int n, char *window); |
| | | void show_image_layers(image p, char *name); |
| | | void show_image_collapsed(image p, char *name); |
| | | void print_image(image m); |
| | |
| | | |
| | | float get_pixel(image m, int x, int y, int c); |
| | | float get_pixel_extend(image m, int x, int y, int c); |
| | | void add_pixel(image m, int x, int y, int c, float val); |
| | | void set_pixel(image m, int x, int y, int c, float val); |
| | | |
| | | image get_image_layer(image m, int l); |
| | |
| | | #include <stdio.h> |
| | | #include <math.h> |
| | | #include <time.h> |
| | | #include <string.h> |
| | | #include "mini_blas.h" |
| | | |
| | | void pm(int M, int N, float *A) |
| | | { |
| | |
| | | } |
| | | |
| | | void gemm(int TA, int TB, int M, int N, int K, float ALPHA, |
| | | float *A, int lda, |
| | | float *B, int ldb, |
| | | float BETA, |
| | | float *C, int ldc) |
| | | float *A, int lda, |
| | | float *B, int ldb, |
| | | float BETA, |
| | | float *C, int ldc) |
| | | { |
| | | // Assume beta = 1 LULZ |
| | | int i,j,k; |
| | | if(TB && !TA){ |
| | | for(i = 0; i < M; ++i){ |
| | | for(j = 0; j < N; ++j){ |
| | | register float sum = 0; |
| | | for(k = 0; k < K; ++k){ |
| | | sum += ALPHA*A[i*lda+k]*B[k+j*ldb]; |
| | | } |
| | | C[i*ldc+j] += sum; |
| | | } |
| | | } |
| | | }else if(TA && !TB){ |
| | | for(i = 0; i < M; ++i){ |
| | | for(k = 0; k < K; ++k){ |
| | | register float A_PART = ALPHA*A[k*lda+i]; |
| | | for(j = 0; j < N; ++j){ |
| | | C[i*ldc+j] += A_PART*B[k*ldb+j]; |
| | | } |
| | | } |
| | | } |
| | | }else{ |
| | | for(i = 0; i < M; ++i){ |
| | | for(k = 0; k < K; ++k){ |
| | | register float A_PART = ALPHA*A[i*lda+k]; |
| | | for(j = 0; j < N; ++j){ |
| | | C[i*ldc+j] += A_PART*B[k*ldb+j]; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | gpu_gemm( TA, TB, M, N, K, ALPHA,A,lda, B, ldb,BETA,C,ldc); |
| | | } |
| | | |
| | | void im2row(float *image, int h, int w, int c, int size, int stride, float *matrix) |
| | |
| | | |
| | | void time_random_matrix(int TA, int TB, int m, int k, int n) |
| | | { |
| | | float *a = random_matrix(m,k); |
| | | float *b = random_matrix(k,n); |
| | | float *a; |
| | | if(!TA) a = random_matrix(m,k); |
| | | else a = random_matrix(k,m); |
| | | int lda = (!TA)?k:m; |
| | | float *b; |
| | | if(!TB) b = random_matrix(k,n); |
| | | else b = random_matrix(n,k); |
| | | int ldb = (!TB)?n:k; |
| | | |
| | | float *c = random_matrix(m,n); |
| | | int i; |
| | | clock_t start = clock(), end; |
| | | for(i = 0; i<1000; ++i){ |
| | | gemm(TA,TB,m,n,k,1,a,k,b,n,1,c,n); |
| | | cpu_gemm(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); |
| | | } |
| | | end = clock(); |
| | | printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); |
| | | free(a); |
| | | free(b); |
| | | free(c); |
| | | } |
| | | |
| | | void test_blas() |
| | |
| | | time_random_matrix(0,0,100,100,100); |
| | | time_random_matrix(1,0,100,100,100); |
| | | time_random_matrix(0,1,100,100,100); |
| | | time_random_matrix(1,1,100,100,100); |
| | | |
| | | time_random_matrix(0,1,1000,100,100); |
| | | time_random_matrix(0,0,1000,100,100); |
| | | time_random_matrix(1,0,1000,100,100); |
| | | time_random_matrix(0,1,1000,100,100); |
| | | time_random_matrix(1,1,1000,100,100); |
| | | |
| | | |
| | | } |
| | | |
| | | void time_gpu_random_matrix(int TA, int TB, int m, int k, int n) |
| | | { |
| | | float *a; |
| | | if(!TA) a = random_matrix(m,k); |
| | | else a = random_matrix(k,m); |
| | | int lda = (!TA)?k:m; |
| | | float *b; |
| | | if(!TB) b = random_matrix(k,n); |
| | | else b = random_matrix(n,k); |
| | | int ldb = (!TB)?n:k; |
| | | |
| | | float *c = random_matrix(m,n); |
| | | int i; |
| | | clock_t start = clock(), end; |
| | | for(i = 0; i<1000; ++i){ |
| | | gpu_gemm(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); |
| | | } |
| | | end = clock(); |
| | | printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %lf ms\n",m,k,k,n, TA, TB, (float)(end-start)/CLOCKS_PER_SEC); |
| | | free(a); |
| | | free(b); |
| | | free(c); |
| | | } |
| | | |
| | | void test_gpu_accuracy(int TA, int TB, int m, int k, int n) |
| | | { |
| | | srand(0); |
| | | float *a; |
| | | if(!TA) a = random_matrix(m,k); |
| | | else a = random_matrix(k,m); |
| | | int lda = (!TA)?k:m; |
| | | float *b; |
| | | if(!TB) b = random_matrix(k,n); |
| | | else b = random_matrix(n,k); |
| | | int ldb = (!TB)?n:k; |
| | | |
| | | float *c = random_matrix(m,n); |
| | | float *c_gpu = random_matrix(m,n); |
| | | memset(c, 0, m*n*sizeof(float)); |
| | | memset(c_gpu, 0, m*n*sizeof(float)); |
| | | int i; |
| | | //pm(m,k,b); |
| | | gpu_gemm(TA,TB,m,n,k,1,a,lda,b,ldb,1,c_gpu,n); |
| | | //pm(m, n, c_gpu); |
| | | cpu_gemm(TA,TB,m,n,k,1,a,lda,b,ldb,1,c,n); |
| | | //pm(m, n, c); |
| | | double sse = 0; |
| | | for(i = 0; i < m*n; ++i) { |
| | | //printf("%f %f\n", c[i], c_gpu[i]); |
| | | sse += pow(c[i]-c_gpu[i], 2); |
| | | } |
| | | printf("Matrix Multiplication %dx%d * %dx%d, TA=%d, TB=%d: %g MSE\n",m,k,k,n, TA, TB, sse/(m*n)); |
| | | free(a); |
| | | free(b); |
| | | free(c); |
| | | } |
| | | |
| | | void test_gpu_blas() |
| | | { |
| | | test_gpu_accuracy(0,0,17,10,10); |
| | | test_gpu_accuracy(1,0,17,10,10); |
| | | test_gpu_accuracy(0,1,17,10,10); |
| | | test_gpu_accuracy(1,1,17,10,10); |
| | | |
| | | test_gpu_accuracy(0,0,1000,10,100); |
| | | test_gpu_accuracy(1,0,1000,10,100); |
| | | test_gpu_accuracy(0,1,1000,10,100); |
| | | test_gpu_accuracy(1,1,1000,10,100); |
| | | |
| | | time_gpu_random_matrix(0,0,1000,1000,100); |
| | | time_random_matrix(0,0,1000,1000,100); |
| | | |
| | | time_gpu_random_matrix(0,1,1000,1000,100); |
| | | time_random_matrix(0,1,1000,1000,100); |
| | | |
| | | time_gpu_random_matrix(1,0,1000,1000,100); |
| | | time_random_matrix(1,0,1000,1000,100); |
| | | |
| | | time_gpu_random_matrix(1,1,1000,1000,100); |
| | | time_random_matrix(1,1,1000,1000,100); |
| | | |
| | | } |
| | | |
| | | |
| | |
| | | float *B, int ldb, |
| | | float BETA, |
| | | float *C, int ldc); |
| | | float *random_matrix(int rows, int cols); |
| | | void im2row(float *image, int h, int w, int c, int size, int stride, float *matrix); |
| | | void im2col(float *image, int h, int w, int c, int size, int stride, float *matrix); |
| | | void im2col_cpu(float* data_im, const int channels, |
| | |
| | | const int height, const int width, const int ksize, const int stride, |
| | | float* data_im); |
| | | void test_blas(); |
| | | |
| | | void gpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA, |
| | | float *A, int lda, |
| | | float *B, int ldb, |
| | | float BETA, |
| | | float *C, int ldc); |
| | | void cpu_gemm(int TA, int TB, int M, int N, int K, float ALPHA, |
| | | float *A, int lda, |
| | | float *B, int ldb, |
| | | float BETA, |
| | | float *C, int ldc); |
| | | void test_gpu_blas(); |
| | |
| | | |
| | | void visualize_network(network net) |
| | | { |
| | | image *prev = 0; |
| | | int i; |
| | | char buff[256]; |
| | | for(i = 0; i < net.n; ++i){ |
| | | sprintf(buff, "Layer %d", i); |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | convolutional_layer layer = *(convolutional_layer *)net.layers[i]; |
| | | visualize_convolutional_layer(layer, buff); |
| | | prev = visualize_convolutional_layer(layer, buff, prev); |
| | | } |
| | | } |
| | | } |
| | |
| | | return acc; |
| | | } |
| | | |
| | | |
| New file |
| | |
| | | #include "opencl.h" |
| | | #include <stdio.h> |
| | | #include <stdlib.h> |
| | | #include <string.h> |
| | | |
| | | cl_info cl = {0}; |
| | | |
| | | void check_error(cl_info info) |
| | | { |
| | | if (info.error != CL_SUCCESS) { |
| | | printf("\n Error number %d", info.error); |
| | | } |
| | | } |
| | | |
| | | cl_info cl_init() |
| | | { |
| | | cl_info info; |
| | | info.initialized = 0; |
| | | cl_uint platforms, devices; |
| | | // Fetch the Platform and Device IDs; we only want one. |
| | | info.error=clGetPlatformIDs(1, &info.platform, &platforms); |
| | | check_error(info); |
| | | info.error=clGetDeviceIDs(info.platform, CL_DEVICE_TYPE_ALL, 1, &info.device, &devices); |
| | | check_error(info); |
| | | |
| | | cl_context_properties properties[]={ |
| | | CL_CONTEXT_PLATFORM, (cl_context_properties)info.platform, |
| | | 0}; |
| | | // Note that nVidia's OpenCL requires the platform property |
| | | info.context=clCreateContext(properties, 1, &info.device, 0, 0, &info.error); |
| | | check_error(info); |
| | | info.queue = clCreateCommandQueue(info.context, info.device, 0, &info.error); |
| | | check_error(info); |
| | | info.initialized = 1; |
| | | return info; |
| | | } |
| | | |
| | | cl_program cl_fprog(char *filename, char *options, cl_info info) |
| | | { |
| | | size_t srcsize; |
| | | char src[8192]; |
| | | memset(src, 0, 8192); |
| | | FILE *fil=fopen(filename,"r"); |
| | | srcsize=fread(src, sizeof src, 1, fil); |
| | | fclose(fil); |
| | | const char *srcptr[]={src}; |
| | | // Submit the source code of the example kernel to OpenCL |
| | | cl_program prog=clCreateProgramWithSource(info.context,1, srcptr, &srcsize, &info.error); |
| | | check_error(info); |
| | | char build_c[4096]; |
| | | // and compile it (after this we could extract the compiled version) |
| | | info.error=clBuildProgram(prog, 0, 0, options, 0, 0); |
| | | if ( info.error != CL_SUCCESS ) { |
| | | fprintf(stderr, "Error Building Program: %d\n", info.error); |
| | | clGetProgramBuildInfo( prog, info.device, CL_PROGRAM_BUILD_LOG, 4096, build_c, 0); |
| | | fprintf(stderr, "Build Log for %s program:\n%s\n", filename, build_c); |
| | | } |
| | | return prog; |
| | | } |
| | | |
| | | void cl_setup() |
| | | { |
| | | if(!cl.initialized){ |
| | | cl = cl_init(); |
| | | } |
| | | } |
| | | |
| | | cl_kernel get_kernel(char *filename, char *kernelname, char *options) |
| | | { |
| | | cl_setup(); |
| | | cl_program prog = cl_fprog(filename, options, cl); |
| | | cl_kernel kernel=clCreateKernel(prog, kernelname, &cl.error); |
| | | check_error(cl); |
| | | return kernel; |
| | | } |
| | | |
| | | |
| New file |
| | |
| | | #ifdef __APPLE__ |
| | | #include <OpenCL/opencl.h> |
| | | #else |
| | | #include <CL/cl.h> |
| | | #endif |
| | | |
| | | typedef struct { |
| | | int initialized; |
| | | cl_int error; |
| | | cl_platform_id platform; |
| | | cl_device_id device; |
| | | cl_context context; |
| | | cl_command_queue queue; |
| | | }cl_info; |
| | | |
| | | extern cl_info cl; |
| | | |
| | | void cl_setup(); |
| | | void check_error(cl_info info); |
| | | cl_kernel get_kernel(char *filename, char *kernelname, char *options); |
| | | |
| | |
| | | //lr *= .99; |
| | | } |
| | | } |
| | | |
| | | void test_visualize() |
| | | { |
| | | network net = parse_network_cfg("cfg/imagenet.cfg"); |
| | | srand(2222222); |
| | | visualize_network(net); |
| | | cvWaitKey(0); |
| | | } |
| | | void test_full() |
| | | { |
| | | network net = parse_network_cfg("cfg/backup_1300.cfg"); |
| | |
| | | scale_data_rows(train, 1./255); |
| | | train_network_sgd(net, train, batch, lr, momentum, decay); |
| | | //printf("%5f %5f\n",(double)count*batch/train.X.rows, loss); |
| | | |
| | | |
| | | float test_acc = network_accuracy(net, test); |
| | | printf("%5f %5f\n",(double)count*batch/train.X.rows/5, 1-test_acc); |
| | | free_data(train); |
| | |
| | | //printf("Time: %lf seconds\n", (float)(end-start)/CLOCKS_PER_SEC); |
| | | //start=end; |
| | | /* |
| | | if(count%5 == 0){ |
| | | float train_acc = network_accuracy(net, train); |
| | | fprintf(stderr, "\nTRAIN: %f\n", train_acc); |
| | | float test_acc = network_accuracy(net, test); |
| | | fprintf(stderr, "TEST: %f\n\n", test_acc); |
| | | printf("%d, %f, %f\n", count, train_acc, test_acc); |
| | | //lr *= .5; |
| | | if(count%5 == 0){ |
| | | float train_acc = network_accuracy(net, train); |
| | | fprintf(stderr, "\nTRAIN: %f\n", train_acc); |
| | | float test_acc = network_accuracy(net, test); |
| | | fprintf(stderr, "TEST: %f\n\n", test_acc); |
| | | printf("%d, %f, %f\n", count, train_acc, test_acc); |
| | | //lr *= .5; |
| | | } |
| | | */ |
| | | */ |
| | | } |
| | | } |
| | | |
| | |
| | | cvReleaseImage(&src); |
| | | } |
| | | |
| | | void visualize_imagenet_features(char *filename) |
| | | { |
| | | int i,j,k; |
| | | network net = parse_network_cfg("cfg/voc_imagenet.cfg"); |
| | | list *plist = get_paths(filename); |
| | | node *n = plist->front; |
| | | int h = voc_size(1), w = voc_size(1); |
| | | int num = get_network_image(net).c; |
| | | image *vizs = calloc(num, sizeof(image)); |
| | | for(i = 0; i < num; ++i) vizs[i] = make_image(h, w, 3); |
| | | while(n){ |
| | | char *image_path = (char *)n->val; |
| | | image im = load_image(image_path, 0, 0); |
| | | printf("Processing %dx%d image\n", im.h, im.w); |
| | | resize_network(net, im.h, im.w, im.c); |
| | | forward_network(net, im.data); |
| | | image out = get_network_image(net); |
| | | |
| | | int dh = (im.h - h)/h; |
| | | int dw = (im.w - w)/w; |
| | | for(i = 0; i < out.h; ++i){ |
| | | for(j = 0; j < out.w; ++j){ |
| | | image sub = get_sub_image(im, dh*i, dw*j, h, w); |
| | | for(k = 0; k < out.c; ++k){ |
| | | float val = get_pixel(out, i, j, k); |
| | | //printf("%f, ", val); |
| | | image sub_c = copy_image(sub); |
| | | scale_image(sub_c, val); |
| | | add_into_image(sub_c, vizs[k], 0, 0); |
| | | free_image(sub_c); |
| | | } |
| | | free_image(sub); |
| | | } |
| | | } |
| | | //printf("\n"); |
| | | show_images(vizs, 10, "IMAGENET Visualization"); |
| | | cvWaitKey(1000); |
| | | n = n->next; |
| | | } |
| | | cvWaitKey(0); |
| | | } |
| | | |
| | | void features_VOC_image(char *image_file, char *image_dir, char *out_dir) |
| | | { |
| | | int i,j; |
| | |
| | | //feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW); |
| | | |
| | | //test_blas(); |
| | | //test_visualize(); |
| | | //test_gpu_blas(); |
| | | //test_blas(); |
| | | //test_convolve_matrix(); |
| | | // test_im2row(); |
| | | //test_split(); |
| | |
| | | //test_full(); |
| | | //train_VOC(); |
| | | //features_VOC_image(argv[1], argv[2], argv[3]); |
| | | features_VOC_image_size(argv[1], atoi(argv[2]), atoi(argv[3])); |
| | | //features_VOC_image_size(argv[1], atoi(argv[2]), atoi(argv[3])); |
| | | //visualize_imagenet_features("data/assira/train.list"); |
| | | visualize_imagenet_features("data/VOC2011.list"); |
| | | fprintf(stderr, "Success!\n"); |
| | | //test_random_preprocess(); |
| | | //test_random_classify(); |