| | |
| | | *.o |
| | | *.dSYM |
| | | *.csv |
| | | *.out |
| | | mnist/ |
| | | images/ |
| | | opencv/ |
| | | convnet/ |
| | |
| | | width=28 |
| | | height=28 |
| | | channels=1 |
| | | filters=4 |
| | | filters=6 |
| | | size=5 |
| | | stride=1 |
| | | activation=ramp |
| | |
| | | stride=2 |
| | | |
| | | [conv] |
| | | filters=12 |
| | | filters=16 |
| | | size=5 |
| | | stride=1 |
| | | activation=ramp |
| | |
| | | stride=2 |
| | | |
| | | [conv] |
| | | filters=10 |
| | | filters=120 |
| | | size=3 |
| | | stride=1 |
| | | activation=ramp |
| | |
| | | stride=2 |
| | | |
| | | [conn] |
| | | output = 80 |
| | | activation=ramp |
| | | |
| | | [conn] |
| | | output = 10 |
| | | activation=ramp |
| | | |
| | |
| | | { |
| | | if (strcmp(s, "sigmoid")==0) return SIGMOID; |
| | | if (strcmp(s, "relu")==0) return RELU; |
| | | if (strcmp(s, "identity")==0) return IDENTITY; |
| | | if (strcmp(s, "linear")==0) return LINEAR; |
| | | if (strcmp(s, "ramp")==0) return RAMP; |
| | | if (strcmp(s, "tanh")==0) return TANH; |
| | | fprintf(stderr, "Couldn't find activation function %s, going with ReLU\n", s); |
| | | return RELU; |
| | | } |
| | | |
| | | double activate(double x, ACTIVATION a){ |
| | | switch(a){ |
| | | case IDENTITY: |
| | | case LINEAR: |
| | | return x; |
| | | case SIGMOID: |
| | | return 1./(1.+exp(-x)); |
| | |
| | | return x*(x>0); |
| | | case RAMP: |
| | | return x*(x>0) + .1*x; |
| | | case TANH: |
| | | return (exp(2*x)-1)/(exp(2*x)+1); |
| | | } |
| | | return 0; |
| | | } |
| | | double gradient(double x, ACTIVATION a){ |
| | | switch(a){ |
| | | case IDENTITY: |
| | | case LINEAR: |
| | | return 1; |
| | | case SIGMOID: |
| | | return (1.-x)*x; |
| | |
| | | return (x>0); |
| | | case RAMP: |
| | | return (x>0) + .1; |
| | | case TANH: |
| | | return 1-x*x; |
| | | } |
| | | return 0; |
| | | } |
| | | |
| | | double identity_activation(double x) |
| | | { |
| | | return x; |
| | | } |
| | | double identity_gradient(double x) |
| | | { |
| | | return 1; |
| | | } |
| | | |
| | | double relu_activation(double x) |
| | | { |
| | | return x*(x>0); |
| | | } |
| | | double relu_gradient(double x) |
| | | { |
| | | return (x>0); |
| | | } |
| | | |
| | | double sigmoid_activation(double x) |
| | | { |
| | | return 1./(1.+exp(-x)); |
| | | } |
| | | |
| | | double sigmoid_gradient(double x) |
| | | { |
| | | return x*(1.-x); |
| | | } |
| | | |
| | |
| | | #define ACTIVATIONS_H |
| | | |
| | | typedef enum{ |
| | | SIGMOID, RELU, IDENTITY, RAMP |
| | | SIGMOID, RELU, LINEAR, RAMP, TANH |
| | | }ACTIVATION; |
| | | |
| | | ACTIVATION get_activation(char *s); |
| | |
| | | |
| | | connected_layer *make_connected_layer(int inputs, int outputs, ACTIVATION activation) |
| | | { |
| | | printf("Connected Layer: %d inputs, %d outputs\n", inputs, outputs); |
| | | fprintf(stderr, "Connected Layer: %d inputs, %d outputs\n", inputs, outputs); |
| | | int i; |
| | | connected_layer *layer = calloc(1, sizeof(connected_layer)); |
| | | layer->inputs = inputs; |
| | |
| | | layer->biases = calloc(outputs, sizeof(double)); |
| | | for(i = 0; i < outputs; ++i) |
| | | //layer->biases[i] = rand_normal()*scale + scale; |
| | | layer->biases[i] = 1; |
| | | layer->biases[i] = 0; |
| | | |
| | | layer->activation = activation; |
| | | return layer; |
| | |
| | | layer->w = w; |
| | | layer->c = c; |
| | | layer->n = n; |
| | | layer->edge = 0; |
| | | layer->edge = 1; |
| | | layer->stride = stride; |
| | | layer->kernels = calloc(n, sizeof(image)); |
| | | layer->kernel_updates = calloc(n, sizeof(image)); |
| | |
| | | layer->biases = calloc(n, sizeof(double)); |
| | | layer->bias_updates = calloc(n, sizeof(double)); |
| | | layer->bias_momentum = calloc(n, sizeof(double)); |
| | | double scale = 20./(size*size*c); |
| | | double scale = 2./(size*size); |
| | | for(i = 0; i < n; ++i){ |
| | | //layer->biases[i] = rand_normal()*scale + scale; |
| | | layer->biases[i] = 1; |
| | | layer->biases[i] = 0; |
| | | layer->kernels[i] = make_random_kernel(size, c, scale); |
| | | layer->kernel_updates[i] = make_random_kernel(size, c, 0); |
| | | layer->kernel_momentum[i] = make_random_kernel(size, c, 0); |
| | |
| | | out_h = (layer->h - layer->size)/layer->stride+1; |
| | | out_w = (layer->h - layer->size)/layer->stride+1; |
| | | } |
| | | printf("Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); |
| | | fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n); |
| | | layer->output = calloc(out_h * out_w * n, sizeof(double)); |
| | | layer->delta = calloc(out_h * out_w * n, sizeof(double)); |
| | | layer->upsampled = make_image(h,w,n); |
| | |
| | | } |
| | | } |
| | | |
| | | void learn_convolutional_layer(convolutional_layer layer, double *input) |
| | | void gradient_delta_convolutional_layer(convolutional_layer layer) |
| | | { |
| | | int i; |
| | | image in_image = double_to_image(layer.h, layer.w, layer.c, input); |
| | | image out_delta = get_convolutional_delta(layer); |
| | | image out_image = get_convolutional_image(layer); |
| | | for(i = 0; i < out_image.h*out_image.w*out_image.c; ++i){ |
| | | out_delta.data[i] *= gradient(out_image.data[i], layer.activation); |
| | | } |
| | | } |
| | | |
| | | void learn_convolutional_layer(convolutional_layer layer, double *input) |
| | | { |
| | | int i; |
| | | image in_image = double_to_image(layer.h, layer.w, layer.c, input); |
| | | image out_delta = get_convolutional_delta(layer); |
| | | gradient_delta_convolutional_layer(layer); |
| | | for(i = 0; i < layer.n; ++i){ |
| | | kernel_update(in_image, layer.kernel_updates[i], layer.stride, i, out_delta, layer.edge); |
| | | layer.bias_updates[i] += avg_image_layer(out_delta, i); |
| | |
| | | |
| | | maxpool_layer *make_maxpool_layer(int h, int w, int c, int stride) |
| | | { |
| | | printf("Maxpool Layer: %d x %d x %d image, %d stride\n", h,w,c,stride); |
| | | fprintf(stderr, "Maxpool Layer: %d x %d x %d image, %d stride\n", h,w,c,stride); |
| | | maxpool_layer *layer = calloc(1, sizeof(maxpool_layer)); |
| | | layer->h = h; |
| | | layer->w = w; |
| | |
| | | } |
| | | double mean = mean_array(output, n); |
| | | double vari = variance_array(output, n); |
| | | printf("Layer %d - Mean: %f, Variance: %f\n",i,mean, vari); |
| | | fprintf(stderr, "Layer %d - Mean: %f, Variance: %f\n",i,mean, vari); |
| | | if(n > 100) n = 100; |
| | | for(j = 0; j < n; ++j) printf("%f, ", output[j]); |
| | | if(n == 100)printf(".....\n"); |
| | | printf("\n"); |
| | | for(j = 0; j < n; ++j) fprintf(stderr, "%f, ", output[j]); |
| | | if(n == 100)fprintf(stderr,".....\n"); |
| | | fprintf(stderr, "\n"); |
| | | } |
| | | } |
| | |
| | | |
| | | softmax_layer *make_softmax_layer(int inputs) |
| | | { |
| | | printf("Softmax Layer: %d inputs\n", inputs); |
| | | fprintf(stderr, "Softmax Layer: %d inputs\n", inputs); |
| | | softmax_layer *layer = calloc(1, sizeof(softmax_layer)); |
| | | layer->inputs = inputs; |
| | | layer->output = calloc(inputs, sizeof(double)); |
| | |
| | | void test_convolve() |
| | | { |
| | | image dog = load_image("dog.jpg"); |
| | | //show_image_layers(dog, "Dog"); |
| | | printf("dog channels %d\n", dog.c); |
| | | image kernel = make_random_image(3,3,dog.c); |
| | | image edge = make_image(dog.h, dog.w, 1); |
| | |
| | | image out_delta = get_convolutional_delta(layer); |
| | | for(i = 0; i < out.h*out.w*out.c; ++i){ |
| | | out_delta.data[i] = 1; |
| | | backward_convolutional_layer2(layer, test.data, in_delta.data); |
| | | backward_convolutional_layer(layer, test.data, in_delta.data); |
| | | image partial = copy_image(in_delta); |
| | | jacobian2[i] = partial.data; |
| | | out_delta.data[i] = 0; |
| | |
| | | int count = 0; |
| | | |
| | | double avgerr = 0; |
| | | while(1){ |
| | | while(++count < 100000000){ |
| | | double v = ((double)rand()/RAND_MAX); |
| | | double truth = v*v; |
| | | input[0] = v; |
| | |
| | | double *delta = get_network_delta(net); |
| | | double err = pow((out[0]-truth),2.); |
| | | avgerr = .99 * avgerr + .01 * err; |
| | | //if(++count % 100000 == 0) printf("%f\n", avgerr); |
| | | if(++count % 1000000 == 0) printf("%f %f :%f AVG %f \n", truth, out[0], err, avgerr); |
| | | if(count % 1000000 == 0) printf("%f %f :%f AVG %f \n", truth, out[0], err, avgerr); |
| | | delta[0] = truth - out[0]; |
| | | learn_network(net, input); |
| | | update_network(net, .001); |
| | |
| | | } |
| | | } |
| | | |
| | | double error_network(network net, matrix m, double *truth) |
| | | double error_network(network net, matrix m, double **truth) |
| | | { |
| | | int i; |
| | | int correct = 0; |
| | | int k = get_network_output_size(net); |
| | | for(i = 0; i < m.rows; ++i){ |
| | | forward_network(net, m.vals[i]); |
| | | double *out = get_network_output(net); |
| | | double err = truth[i] - out[0]; |
| | | if(fabs(err) < .5) ++correct; |
| | | int guess = max_index(out, k); |
| | | if(truth[i][guess]) ++correct; |
| | | } |
| | | return (double)correct/m.rows; |
| | | } |
| | |
| | | |
| | | void test_nist() |
| | | { |
| | | srand(999999); |
| | | network net = parse_network_cfg("nist.cfg"); |
| | | matrix m = csv_to_matrix("images/nist_train.csv"); |
| | | matrix ho = hold_out_matrix(&m, 3000); |
| | | matrix m = csv_to_matrix("mnist/mnist_train.csv"); |
| | | matrix test = csv_to_matrix("mnist/mnist_test.csv"); |
| | | double *truth_1d = pop_column(&m, 0); |
| | | double **truth = one_hot(truth_1d, m.rows, 10); |
| | | double *ho_truth_1d = pop_column(&ho, 0); |
| | | double **ho_truth = one_hot(ho_truth_1d, ho.rows, 10); |
| | | double *test_truth_1d = pop_column(&test, 0); |
| | | double **test_truth = one_hot(test_truth_1d, test.rows, 10); |
| | | int i,j; |
| | | clock_t start = clock(), end; |
| | | for(i = 0; i < test.rows; ++i){ |
| | | normalize_array(test.vals[i], 28*28); |
| | | //scale_array(m.vals[i], 28*28, 1./255.); |
| | | //translate_array(m.vals[i], 28*28, -.1); |
| | | } |
| | | for(i = 0; i < m.rows; ++i){ |
| | | normalize_array(m.vals[i], 28*28); |
| | | //scale_array(m.vals[i], 28*28, 1./255.); |
| | | //translate_array(m.vals[i], 28*28, -.1); |
| | | } |
| | | int count = 0; |
| | | double lr = .0001; |
| | | while(++count <= 3000000){ |
| | | double lr = .0005; |
| | | while(++count <= 300){ |
| | | //lr *= .99; |
| | | int index = 0; |
| | | int correct = 0; |
| | | for(i = 0; i < 1000; ++i){ |
| | | int number = 1000; |
| | | for(i = 0; i < number; ++i){ |
| | | index = rand()%m.rows; |
| | | normalize_array(m.vals[index], 28*28); |
| | | forward_network(net, m.vals[index]); |
| | | double *out = get_network_output(net); |
| | | double *delta = get_network_delta(net); |
| | |
| | | } |
| | | print_network(net); |
| | | image input = double_to_image(28,28,1, m.vals[index]); |
| | | show_image(input, "Input"); |
| | | //show_image(input, "Input"); |
| | | image o = get_network_image(net); |
| | | show_image_collapsed(o, "Output"); |
| | | //show_image_collapsed(o, "Output"); |
| | | visualize_network(net); |
| | | cvWaitKey(100); |
| | | cvWaitKey(10); |
| | | //double test_acc = error_network(net, m, truth); |
| | | //double valid_acc = error_network(net, ho, ho_truth); |
| | | //printf("%f, %f\n", test_acc, valid_acc); |
| | | fprintf(stderr, "%5d: %f %f\n",count, (double)correct/1000, lr); |
| | | //if(valid_acc > .70) break; |
| | | fprintf(stderr, "\n%5d: %f %f\n\n",count, (double)correct/number, lr); |
| | | if(count % 10 == 0 && 0){ |
| | | double train_acc = error_network(net, m, truth); |
| | | fprintf(stderr, "\nTRAIN: %f\n", train_acc); |
| | | double test_acc = error_network(net, test, test_truth); |
| | | fprintf(stderr, "TEST: %f\n\n", test_acc); |
| | | printf("%d, %f, %f\n", count, train_acc, test_acc); |
| | | } |
| | | if(count % (m.rows/number) == 0) lr /= 2; |
| | | } |
| | | double train_acc = error_network(net, m, truth); |
| | | fprintf(stderr, "\nTRAIN: %f\n", train_acc); |
| | | double test_acc = error_network(net, test, test_truth); |
| | | fprintf(stderr, "TEST: %f\n\n", test_acc); |
| | | printf("%d, %f, %f\n", count, train_acc, test_acc); |
| | | end = clock(); |
| | | printf("Neural Net Learning: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC); |
| | | //printf("Neural Net Learning: %lf seconds\n", (double)(end-start)/CLOCKS_PER_SEC); |
| | | } |
| | | |
| | | void test_kernel_update() |
| | |
| | | double delta[] = {.1}; |
| | | double input[] = {.3, .5, .3, .5, .5, .5, .5, .0, .5}; |
| | | double kernel[] = {1,2,3,4,5,6,7,8,9}; |
| | | convolutional_layer layer = *make_convolutional_layer(3, 3, 1, 1, 3, 1, IDENTITY); |
| | | convolutional_layer layer = *make_convolutional_layer(3, 3, 1, 1, 3, 1, LINEAR); |
| | | layer.kernels[0].data = kernel; |
| | | layer.delta = delta; |
| | | learn_convolutional_layer(layer, input); |
| | | print_image(layer.kernels[0]); |
| | | print_image(get_convolutional_delta(layer)); |
| | | print_image(layer.kernel_updates[0]); |
| | | |
| | | |
| | | } |
| | | |
| | | void test_random_classify() |
| | |
| | | double *delta = get_network_delta(net); |
| | | //printf("%f\n", out[0]); |
| | | delta[0] = truth[index] - out[0]; |
| | | // printf("%f\n", delta[0]); |
| | | // printf("%f\n", delta[0]); |
| | | //printf("%f %f\n", truth[index], out[0]); |
| | | learn_network(net, m.vals[index]); |
| | | update_network(net, .00001); |
| | | } |
| | | double test_acc = error_network(net, m, truth); |
| | | double valid_acc = error_network(net, ho, ho_truth); |
| | | printf("%f, %f\n", test_acc, valid_acc); |
| | | fprintf(stderr, "%5d: %f Valid: %f\n",count, test_acc, valid_acc); |
| | | //double test_acc = error_network(net, m, truth); |
| | | //double valid_acc = error_network(net, ho, ho_truth); |
| | | //printf("%f, %f\n", test_acc, valid_acc); |
| | | //fprintf(stderr, "%5d: %f Valid: %f\n",count, test_acc, valid_acc); |
| | | //if(valid_acc > .70) break; |
| | | } |
| | | end = clock(); |
| | |
| | | int main() |
| | | { |
| | | //test_kernel_update(); |
| | | //test_nist(); |
| | | test_full(); |
| | | test_nist(); |
| | | //test_full(); |
| | | //test_random_preprocess(); |
| | | //test_random_classify(); |
| | | //test_parser(); |
| | |
| | | sigma = sqrt(variance_array(a,n)); |
| | | } |
| | | |
| | | void translate_array(double *a, int n, double s) |
| | | { |
| | | int i; |
| | | for(i = 0; i < n; ++i){ |
| | | a[i] += s; |
| | | } |
| | | } |
| | | |
| | | void scale_array(double *a, int n, double s) |
| | | { |
| | | int i; |
| | | for(i = 0; i < n; ++i){ |
| | | a[i] *= s; |
| | | } |
| | | } |
| | | int max_index(double *a, int n) |
| | | { |
| | | if(n <= 0) return -1; |
| | | int i, max_i = 0; |
| | | double max = a[0]; |
| | | for(i = 1; i < n; ++i){ |
| | | if(a[i] > max){ |
| | | max = a[i]; |
| | | max_i = i; |
| | | } |
| | | } |
| | | return max_i; |
| | | } |
| | | |
| | | double rand_normal() |
| | | { |
| | | int i; |
| | |
| | | int count_fields(char *line); |
| | | double *parse_fields(char *line, int n); |
| | | void normalize_array(double *a, int n); |
| | | void scale_array(double *a, int n, double s); |
| | | void translate_array(double *a, int n, double s); |
| | | int max_index(double *a, int n); |
| | | double constrain(double a, double max); |
| | | double rand_normal(); |
| | | double mean_array(double *a, int n); |