| | |
| | | GPU=1 |
| | | DEBUG=0 |
| | | ARCH= -arch=sm_35 |
| | | |
| | | VPATH=./src/ |
| | | EXEC=cnn |
| | |
| | | CC=gcc |
| | | NVCC=nvcc |
| | | OPTS=-O3 |
| | | LINKER=$(CC) |
| | | LDFLAGS=`pkg-config --libs opencv` -lm -pthread |
| | | COMMON=`pkg-config --cflags opencv` -I/usr/local/cuda/include/ |
| | | CFLAGS=-Wall -Wfatal-errors |
| | |
| | | endif |
| | | |
| | | ifeq ($(GPU), 1) |
| | | LINKER=$(NVCC) |
| | | COMMON+=-DGPU |
| | | CFLAGS+=-DGPU |
| | | LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas |
| | |
| | | $(CC) $(COMMON) $(CFLAGS) -c $< -o $@ |
| | | |
| | | $(OBJDIR)%.o: %.cu |
| | | $(NVCC) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ |
| | | $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@ |
| | | |
| | | .PHONY: clean |
| | | |
| | |
| | | //network net = parse_network_cfg("/home/pjreddie/imagenet_backup/alexnet_1270.cfg"); |
| | | srand(time(0)); |
| | | network net = parse_network_cfg(cfgfile); |
| | | set_learning_network(&net, net.learning_rate, net.momentum, net.decay); |
| | | //test_learn_bias(*(convolutional_layer *)net.layers[1]); |
| | | //set_learning_network(&net, net.learning_rate, 0, net.decay); |
| | | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); |
| | | int imgs = 3072; |
| | | int i = net.seen/imgs; |
| | |
| | | cvWaitKey(0); |
| | | } |
| | | |
| | | void test_cifar10() |
| | | void test_cifar10(char *cfgfile) |
| | | { |
| | | network net = parse_network_cfg("cfg/cifar10_part5.cfg"); |
| | | network net = parse_network_cfg(cfgfile); |
| | | data test = load_cifar10_data("data/cifar10/test_batch.bin"); |
| | | clock_t start = clock(), end; |
| | | float test_acc = network_accuracy(net, test); |
| | | float test_acc = network_accuracy_multi(net, test, 10); |
| | | end = clock(); |
| | | printf("%f in %f Sec\n", test_acc, (float)(end-start)/CLOCKS_PER_SEC); |
| | | visualize_network(net); |
| | | cvWaitKey(0); |
| | | printf("%f in %f Sec\n", test_acc, sec(end-start)); |
| | | //visualize_network(net); |
| | | //cvWaitKey(0); |
| | | } |
| | | |
| | | void train_cifar10() |
| | | void train_cifar10(char *cfgfile) |
| | | { |
| | | srand(555555); |
| | | network net = parse_network_cfg("cfg/cifar10.cfg"); |
| | | srand(time(0)); |
| | | network net = parse_network_cfg(cfgfile); |
| | | data test = load_cifar10_data("data/cifar10/test_batch.bin"); |
| | | int count = 0; |
| | | int iters = 10000/net.batch; |
| | | int iters = 50000/net.batch; |
| | | data train = load_all_cifar10(); |
| | | while(++count <= 10000){ |
| | | clock_t time = clock(); |
| | |
| | | if(count%10 == 0){ |
| | | float test_acc = network_accuracy(net, test); |
| | | printf("%d: Loss: %f, Test Acc: %f, Time: %lf seconds\n", count, loss, test_acc,sec(clock()-time)); |
| | | //char buff[256]; |
| | | //sprintf(buff, "unikitty/cifar10_%d.cfg", count); |
| | | //save_network(net, buff); |
| | | char buff[256]; |
| | | sprintf(buff, "/home/pjreddie/imagenet_backup/cifar10_%d.cfg", count); |
| | | save_network(net, buff); |
| | | }else{ |
| | | printf("%d: Loss: %f, Time: %lf seconds\n", count, loss, sec(clock()-time)); |
| | | } |
| | |
| | | } |
| | | #endif |
| | | |
| | | if(0==strcmp(argv[1], "cifar")) train_cifar10(); |
| | | else if(0==strcmp(argv[1], "test_correct")) test_correct_alexnet(); |
| | | if(0==strcmp(argv[1], "test_correct")) test_correct_alexnet(); |
| | | else if(0==strcmp(argv[1], "test_correct_nist")) test_correct_nist(); |
| | | else if(0==strcmp(argv[1], "test")) test_imagenet(); |
| | | //else if(0==strcmp(argv[1], "server")) run_server(); |
| | |
| | | return 0; |
| | | } |
| | | else if(0==strcmp(argv[1], "detection")) train_detection_net(argv[2]); |
| | | else if(0==strcmp(argv[1], "ctrain")) train_cifar10(argv[2]); |
| | | else if(0==strcmp(argv[1], "nist")) train_nist(argv[2]); |
| | | else if(0==strcmp(argv[1], "ctest")) test_cifar10(argv[2]); |
| | | else if(0==strcmp(argv[1], "train")) train_imagenet(argv[2]); |
| | | //else if(0==strcmp(argv[1], "client")) train_imagenet_distributed(argv[2]); |
| | | else if(0==strcmp(argv[1], "detect")) test_detection(argv[2]); |
| | |
| | | axpy_cpu(layer->outputs, 1, layer->bias_updates, 1, layer->bias_prev, 1); |
| | | scal_cpu(layer->outputs, 0, layer->bias_updates, 1); |
| | | |
| | | //printf("rate: %f\n", layer->learning_rate); |
| | | |
| | | axpy_cpu(layer->outputs, layer->learning_rate, layer->bias_prev, 1, layer->biases, 1); |
| | | |
| | | axpy_cpu(layer->inputs*layer->outputs, -layer->decay, layer->weights, 1, layer->weight_prev, 1); |
| | |
| | | { |
| | | __shared__ float part[BLOCK]; |
| | | int i,b; |
| | | int filter = (blockIdx.x + blockIdx.y*gridDim.x); |
| | | int filter = blockIdx.x; |
| | | int p = threadIdx.x; |
| | | float sum = 0; |
| | | for(b = 0; b < batch; ++b){ |
| | |
| | | { |
| | | int size = convolutional_out_height(layer)*convolutional_out_width(layer); |
| | | |
| | | |
| | | learn_bias<<<cuda_gridsize(layer.n), BLOCK>>>(layer.batch, layer.n, size, layer.delta_gpu, layer.bias_updates_gpu); |
| | | learn_bias<<<layer.n, BLOCK>>>(layer.batch, layer.n, size, layer.delta_gpu, layer.bias_updates_gpu); |
| | | check_error(cudaPeekAtLastError()); |
| | | } |
| | | |
| | |
| | | gemm_ongpu(0,0,m,n,k,1.,a,k,b,n,1.,c+i*m*n,n); |
| | | } |
| | | activate_array_ongpu(layer.output_gpu, m*n*layer.batch, layer.activation); |
| | | cuda_pull_array(layer.output_gpu, layer.output, m*n*layer.batch); |
| | | //for(i = 0; i < m*n*layer.batch; ++i) printf("%f, ", layer.output[i]); |
| | | //printf("\n"); |
| | | } |
| | | |
| | | extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, float *in, float *delta_gpu) |
| | |
| | | extern "C" void update_convolutional_layer_gpu(convolutional_layer layer) |
| | | { |
| | | int size = layer.size*layer.size*layer.c*layer.n; |
| | | |
| | | /* |
| | | cuda_pull_array(layer.bias_updates_gpu, layer.bias_updates, layer.n); |
| | | cuda_pull_array(layer.biases_gpu, layer.biases, layer.n); |
| | | cuda_pull_array(layer.filter_updates_gpu, layer.filter_updates, size); |
| | | cuda_pull_array(layer.filters_gpu, layer.filters, size); |
| | | printf("Bias: %f updates: %f\n", mse_array(layer.biases, layer.n), mse_array(layer.bias_updates, layer.n)); |
| | | printf("Filter: %f updates: %f\n", mse_array(layer.filters, layer.n), mse_array(layer.filter_updates, layer.n)); |
| | | */ |
| | | |
| | | axpy_ongpu(layer.n, layer.learning_rate, layer.bias_updates_gpu, 1, layer.biases_gpu, 1); |
| | | scal_ongpu(layer.n,layer.momentum, layer.bias_updates_gpu, 1); |
| | | |
| | |
| | | { |
| | | struct load_args a = *(struct load_args*)ptr; |
| | | *a.d = load_data(a.paths, a.n, a.m, a.labels, a.k, a.h, a.w); |
| | | normalize_data_rows(*a.d); |
| | | translate_data_rows(*a.d, -144); |
| | | scale_data_rows(*a.d, 1./128); |
| | | free(ptr); |
| | | return 0; |
| | | } |
| | |
| | | return "none"; |
| | | } |
| | | |
| | | |
| | | |
| | | network make_network(int n, int batch) |
| | | { |
| | | network net; |
| | |
| | | return net; |
| | | } |
| | | |
| | | |
| | | void forward_network(network net, float *input, float *truth, int train) |
| | | { |
| | | int i; |
| | |
| | | |
| | | float train_network_datum_gpu(network net, float *x, float *y) |
| | | { |
| | | //clock_t time = clock(); |
| | | int x_size = get_network_input_size(net)*net.batch; |
| | | int y_size = get_network_output_size(net)*net.batch; |
| | | if(!*net.input_gpu){ |
| | |
| | | cuda_push_array(*net.input_gpu, x, x_size); |
| | | cuda_push_array(*net.truth_gpu, y, y_size); |
| | | } |
| | | //printf("trans %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | forward_network_gpu(net, *net.input_gpu, *net.truth_gpu, 1); |
| | | //printf("forw %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | backward_network_gpu(net, *net.input_gpu); |
| | | //printf("back %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | update_network_gpu(net); |
| | | float error = get_network_cost(net); |
| | | //printf("updt %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | return error; |
| | | } |
| | | |
| | |
| | | return a; |
| | | } |
| | | |
| | | float mse_array(float *a, int n) |
| | | { |
| | | int i; |
| | | float sum = 0; |
| | | for(i = 0; i < n; ++i) sum += a[i]*a[i]; |
| | | return sqrt(sum/n); |
| | | } |
| | | |
| | | void normalize_array(float *a, int n) |
| | | { |
| | | int i; |
| | |
| | | void translate_array(float *a, int n, float s); |
| | | int max_index(float *a, int n); |
| | | float constrain(float a, float max); |
| | | float mse_array(float *a, int n); |
| | | float rand_normal(); |
| | | float rand_uniform(); |
| | | float sum_array(float *a, int n); |