~speedprog/mtg/mtg_card_detector.git

parent: 118bdd6f | patch | commit | show whitespace

Extracting features from VOC with temp filters

Joseph Redmon

2014-02-15 228d3663f871d0e4bdee468572eb80141cb4fe3f

Extracting features from VOC with temp filters

9 files modified

	src/convolutional_layer.c	33 ●●●●● patch \| view \| raw \| blame \| history
	src/convolutional_layer.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/data.c	1 ●●●●● patch \| view \| raw \| blame \| history
	src/data.h	2 ●●●●● patch \| view \| raw \| blame \| history
	src/image.c	31 ●●●●● patch \| view \| raw \| blame \| history
	src/image.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/network.c	28 ●●●●● patch \| view \| raw \| blame \| history
	src/network.h	1 ●●●●● patch \| view \| raw \| blame \| history
	src/tests.c	62 ●●●●● patch \| view \| raw \| blame \| history

 src/convolutional_layer.c

@@ -3,11 +3,21 @@
#include "mini_blas.h"
#include <stdio.h>

int convolutional_out_height(convolutional_layer layer)
{
    return (layer.h-layer.size)/layer.stride + 1;
}

int convolutional_out_width(convolutional_layer layer)
{
    return (layer.w-layer.size)/layer.stride + 1;
}

image get_convolutional_image(convolutional_layer layer)
{
    int h,w,c;
    h = layer.out_h;
    w = layer.out_w;
    h = convolutional_out_height(layer);
    w = convolutional_out_width(layer);
    c = layer.n;
    return float_to_image(h,w,c,layer.output);
}
@@ -15,8 +25,8 @@
image get_convolutional_delta(convolutional_layer layer)
{
    int h,w,c;
    h = layer.out_h;
    w = layer.out_w;
    h = convolutional_out_height(layer);
    w = convolutional_out_width(layer);
    c = layer.n;
    return float_to_image(h,w,c,layer.delta);
}
@@ -24,7 +34,6 @@
convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int size, int stride, ACTIVATION activation)
{
    int i;
    int out_h,out_w;
    size = 2*(size/2)+1; //HA! And you thought you'd use an even sized filter...
    convolutional_layer *layer = calloc(1, sizeof(convolutional_layer));
    layer->h = h;
@@ -47,15 +56,13 @@
        //layer->biases[i] = rand_normal()*scale + scale;
        layer->biases[i] = 0;
    }
    out_h = (h-size)/stride + 1;
    out_w = (w-size)/stride + 1;
    int out_h = (h-size)/stride + 1;
    int out_w = (w-size)/stride + 1;

    layer->col_image = calloc(out_h*out_w*size*size*c, sizeof(float));
    layer->output = calloc(out_h * out_w * n, sizeof(float));
    layer->delta  = calloc(out_h * out_w * n, sizeof(float));
    layer->activation = activation;
    layer->out_h = out_h;
    layer->out_w = out_w;

    fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
    srand(0);
@@ -90,7 +97,10 @@
void gradient_delta_convolutional_layer(convolutional_layer layer)
{
    int i;
    for(i = 0; i < layer.out_h*layer.out_w*layer.n; ++i){
    int size = convolutional_out_height(layer)
                *convolutional_out_width(layer)
                *layer.n;
    for(i = 0; i < size; ++i){
        layer.delta[i] *= gradient(layer.output[i], layer.activation);
    }
}
@@ -98,7 +108,8 @@
void learn_bias_convolutional_layer(convolutional_layer layer)
{
    int i,j;
    int size = layer.out_h*layer.out_w;
    int size = convolutional_out_height(layer)
                *convolutional_out_width(layer);
    for(i = 0; i < layer.n; ++i){
        float sum = 0;
        for(j = 0; j < size; ++j){

 src/convolutional_layer.h

@@ -6,7 +6,6 @@

typedef struct {
    int h,w,c;
    int out_h, out_w, out_c;
    int n;
    int size;
    int stride;

 src/data.c

@@ -1,5 +1,4 @@
#include "data.h"
#include "list.h"
#include "utils.h"
#include "image.h"


 src/data.h

@@ -2,6 +2,7 @@
#define DATA_H

#include "matrix.h"
#include "list.h"

typedef struct{
    matrix X;
@@ -16,6 +17,7 @@
                                    char **labels, int k, int h, int w);
data load_data_image_pathfile_random(char *filename, int n, char **labels, 
                                        int k, int h, int w);
list *get_paths(char *filename);
data load_categorical_data_csv(char *filename, int target, int k);
void normalize_data_rows(data d);
void scale_data_rows(data d, float s);

 src/image.c

@@ -342,21 +342,11 @@
    return outImg;
}

image load_image(char *filename, int h, int w)
image ipl_to_image(IplImage* src)
{
    IplImage* src = 0;
    if( (src = cvLoadImage(filename,-1)) == 0 )
    {
        printf("Cannot load file image %s\n", filename);
        exit(0);
    }
    cvShowImage("Orig", src);
    IplImage *resized = resizeImage(src, h, w, 1);
    cvShowImage("Sized", resized);
    cvWaitKey(0);
    cvReleaseImage(&src);
    src = resized;
    unsigned char *data = (unsigned char *)src->imageData;
    int h = src->height;
    int w = src->width;
    int c = src->nChannels;
    int step = src->widthStep;
    image out = make_image(h,w,c);
@@ -369,6 +359,21 @@
            }
        }
    }
    return out;
}

image load_image(char *filename, int h, int w)
{
    IplImage* src = 0;
    if( (src = cvLoadImage(filename,-1)) == 0 )
    {
        printf("Cannot load file image %s\n", filename);
        exit(0);
    }
    IplImage *resized = resizeImage(src, h, w, 1);
    cvReleaseImage(&src);
    src = resized;
    image out = ipl_to_image(src);
    cvReleaseImage(&src);
    return out;
}

 src/image.h

@@ -34,6 +34,7 @@
image float_to_image(int h, int w, int c, float *data);
image copy_image(image p);
image load_image(char *filename, int h, int w);
image ipl_to_image(IplImage* src);

float get_pixel(image m, int x, int y, int c);
float get_pixel_extend(image m, int x, int y, int c);

 src/network.c

@@ -331,6 +331,34 @@
    return 0;
}

int reset_network_size(network net, int h, int w, int c)
{
    int i;
    for (i = 0; i < net.n; ++i){
        if(net.types[i] == CONVOLUTIONAL){
            convolutional_layer *layer = (convolutional_layer *)net.layers[i];
            layer->h = h;
            layer->w = w;
            layer->c = c;
            image output = get_convolutional_image(*layer);
            h = output.h;
            w = output.w;
            c = output.c;
        }
        else if(net.types[i] == MAXPOOL){
            maxpool_layer *layer = (maxpool_layer *)net.layers[i];
            layer->h = h;
            layer->w = w;
            layer->c = c;
            image output = get_maxpool_image(*layer);
            h = output.h;
            w = output.w;
            c = output.c;
        }
    }
    return 0;
}

int get_network_output_size(network net)
{
    int i = net.n-1;

 src/network.h

@@ -41,6 +41,7 @@
void print_network(network net);
void visualize_network(network net);
void save_network(network net, char *filename);
int reset_network_size(network net, int h, int w, int c);

#endif


 src/tests.c

@@ -366,20 +366,21 @@

void train_VOC()
{
    network net = parse_network_cfg("cfg/voc_backup_ramp_80.cfg");
    network net = parse_network_cfg("cfg/voc_backup_sig_20.cfg");
    srand(2222222);
    int i = 0;
    int i = 20;
    char *labels[] = {"aeroplane","bicycle","bird","boat","bottle","bus","car","cat","chair","cow","diningtable","dog","horse","motorbike","person","pottedplant","sheep","sofa","train","tvmonitor"};
    float lr = .00001;
    float momentum = .9;
    float decay = 0.01;
    while(i++ < 1000 || 1){
        visualize_network(net);
        cvWaitKey(100);
        data train = load_data_image_pathfile_random("images/VOC2012/train_paths.txt", 1000, labels, 20, 300, 400);

        image im = float_to_image(300, 400, 3,train.X.vals[0]);
        show_image(im, "input");
        visualize_network(net);
        cvWaitKey(100);

        normalize_data_rows(train);
        clock_t start = clock(), end;
        float loss = train_network_sgd(net, train, 1000, lr, momentum, decay);
@@ -388,13 +389,61 @@
        free_data(train);
        if(i%10==0){
            char buff[256];
            sprintf(buff, "cfg/voc_backup_ramp_%d.cfg", i);
            sprintf(buff, "cfg/voc_backup_sig_%d.cfg", i);
            save_network(net, buff);
        }
        //lr *= .99;
    }
}

void features_VOC()
{
    int i,j;
    network net = parse_network_cfg("cfg/voc_features.cfg");
    char *path_file = "images/VOC2012/all_paths.txt";
    char *out_dir = "voc_features/";
    list *paths = get_paths(path_file);
    node *n = paths->front;
    while(n){
        char *path = (char *)n->val;
        char buff[1024];
        sprintf(buff, "%s%s.txt",out_dir, path);
        FILE *fp = fopen(buff, "w");
        if(fp == 0) file_error(buff);

        IplImage* src = 0;
        if( (src = cvLoadImage(path,-1)) == 0 )
        {
            printf("Cannot load file image %s\n", path);
            exit(0);
        }

        for(i = 0; i < 10; ++i){
            int w = 1024 - 90*i; //PICKED WITH CAREFUL CROSS-VALIDATION!!!!
            int h = (int)((double)w/src->width * src->height);
            IplImage *sized = cvCreateImage(cvSize(w,h), src->depth, src->nChannels);
            cvResize(src, sized, CV_INTER_LINEAR);
            image im = ipl_to_image(sized);
            reset_network_size(net, im.h, im.w, im.c);
            forward_network(net, im.data);
            free_image(im);
            image out = get_network_image_layer(net, 5);
            fprintf(fp, "%d, %d, %d\n",out.c, out.h, out.w);
            for(j = 0; j < out.c*out.h*out.w; ++j){
                if(j != 0)fprintf(fp, ",");
                fprintf(fp, "%g", out.data[j]);
            }
            fprintf(fp, "\n");
            out.c = 1;
            show_image(out, "output");
            cvWaitKey(10);
            cvReleaseImage(&sized);
        }
        fclose(fp);
        n = n->next;
    }
}

int main()
{
    //feenableexcept(FE_DIVBYZERO | FE_INVALID | FE_OVERFLOW);
@@ -406,7 +455,8 @@
    //test_ensemble();
    //test_nist();
    //test_full();
    train_VOC();
    //train_VOC();
    features_VOC();
    //test_random_preprocess();
    //test_random_classify();
    //test_parser();

			@@ -3,11 +3,21 @@
			#include "mini_blas.h"
			#include <stdio.h>

			int convolutional_out_height(convolutional_layer layer)
			{
			return (layer.h-layer.size)/layer.stride + 1;
			}

			int convolutional_out_width(convolutional_layer layer)
			{
			return (layer.w-layer.size)/layer.stride + 1;
			}

			image get_convolutional_image(convolutional_layer layer)
			{
			int h,w,c;
			h = layer.out_h;
			w = layer.out_w;
			h = convolutional_out_height(layer);
			w = convolutional_out_width(layer);
			c = layer.n;
			return float_to_image(h,w,c,layer.output);
			}
			@@ -15,8 +25,8 @@
			image get_convolutional_delta(convolutional_layer layer)
			{
			int h,w,c;
			h = layer.out_h;
			w = layer.out_w;
			h = convolutional_out_height(layer);
			w = convolutional_out_width(layer);
			c = layer.n;
			return float_to_image(h,w,c,layer.delta);
			}
			@@ -24,7 +34,6 @@
			convolutional_layer *make_convolutional_layer(int h, int w, int c, int n, int size, int stride, ACTIVATION activation)
			{
			int i;
			int out_h,out_w;
			size = 2*(size/2)+1; //HA! And you thought you'd use an even sized filter...
			convolutional_layer *layer = calloc(1, sizeof(convolutional_layer));
			layer->h = h;
			@@ -47,15 +56,13 @@
			//layer->biases[i] = rand_normal()*scale + scale;
			layer->biases[i] = 0;
			}
			out_h = (h-size)/stride + 1;
			out_w = (w-size)/stride + 1;
			int out_h = (h-size)/stride + 1;
			int out_w = (w-size)/stride + 1;

			layer->col_image = calloc(out_hout_wsizesizec, sizeof(float));
			layer->output = calloc(out_h * out_w * n, sizeof(float));
			layer->delta = calloc(out_h * out_w * n, sizeof(float));
			layer->activation = activation;
			layer->out_h = out_h;
			layer->out_w = out_w;

			fprintf(stderr, "Convolutional Layer: %d x %d x %d image, %d filters -> %d x %d x %d image\n", h,w,c,n, out_h, out_w, n);
			srand(0);
			@@ -90,7 +97,10 @@
			void gradient_delta_convolutional_layer(convolutional_layer layer)
			{
			int i;
			for(i = 0; i < layer.out_hlayer.out_wlayer.n; ++i){
			int size = convolutional_out_height(layer)
			*convolutional_out_width(layer)
			*layer.n;
			for(i = 0; i < size; ++i){
			layer.delta[i] *= gradient(layer.output[i], layer.activation);
			}
			}
			@@ -98,7 +108,8 @@
			void learn_bias_convolutional_layer(convolutional_layer layer)
			{
			int i,j;
			int size = layer.out_h*layer.out_w;
			int size = convolutional_out_height(layer)
			*convolutional_out_width(layer);
			for(i = 0; i < layer.n; ++i){
			float sum = 0;
			for(j = 0; j < size; ++j){

			@@ -6,7 +6,6 @@

			typedef struct {
			int h,w,c;
			int out_h, out_w, out_c;
			int n;
			int size;
			int stride;

			@@ -1,5 +1,4 @@
			#include "data.h"
			#include "list.h"
			#include "utils.h"
			#include "image.h"

			@@ -2,6 +2,7 @@
			#define DATA_H

			#include "matrix.h"
			#include "list.h"

			typedef struct{
			matrix X;
			@@ -16,6 +17,7 @@
			char **labels, int k, int h, int w);
			data load_data_image_pathfile_random(char filename, int n, char *labels,
			int k, int h, int w);
			list get_paths(char filename);
			data load_categorical_data_csv(char *filename, int target, int k);
			void normalize_data_rows(data d);
			void scale_data_rows(data d, float s);

			@@ -342,21 +342,11 @@
			return outImg;
			}

			image load_image(char *filename, int h, int w)
			image ipl_to_image(IplImage* src)
			{
			IplImage* src = 0;
			if( (src = cvLoadImage(filename,-1)) == 0 )
			{
			printf("Cannot load file image %s\n", filename);
			exit(0);
			}
			cvShowImage("Orig", src);
			IplImage *resized = resizeImage(src, h, w, 1);
			cvShowImage("Sized", resized);
			cvWaitKey(0);
			cvReleaseImage(&src);
			src = resized;
			unsigned char data = (unsigned char )src->imageData;
			int h = src->height;
			int w = src->width;
			int c = src->nChannels;
			int step = src->widthStep;
			image out = make_image(h,w,c);
			@@ -369,6 +359,21 @@
			}
			}
			}
			return out;
			}

			image load_image(char *filename, int h, int w)
			{
			IplImage* src = 0;
			if( (src = cvLoadImage(filename,-1)) == 0 )
			{
			printf("Cannot load file image %s\n", filename);
			exit(0);
			}
			IplImage *resized = resizeImage(src, h, w, 1);
			cvReleaseImage(&src);
			src = resized;
			image out = ipl_to_image(src);
			cvReleaseImage(&src);
			return out;
			}

			@@ -34,6 +34,7 @@
			image float_to_image(int h, int w, int c, float *data);
			image copy_image(image p);
			image load_image(char *filename, int h, int w);
			image ipl_to_image(IplImage* src);

			float get_pixel(image m, int x, int y, int c);
			float get_pixel_extend(image m, int x, int y, int c);

			@@ -331,6 +331,34 @@
			return 0;
			}

			int reset_network_size(network net, int h, int w, int c)
			{
			int i;
			for (i = 0; i < net.n; ++i){
			if(net.types[i] == CONVOLUTIONAL){
			convolutional_layer layer = (convolutional_layer )net.layers[i];
			layer->h = h;
			layer->w = w;
			layer->c = c;
			image output = get_convolutional_image(*layer);
			h = output.h;
			w = output.w;
			c = output.c;
			}
			else if(net.types[i] == MAXPOOL){
			maxpool_layer layer = (maxpool_layer )net.layers[i];
			layer->h = h;
			layer->w = w;
			layer->c = c;
			image output = get_maxpool_image(*layer);
			h = output.h;
			w = output.w;
			c = output.c;
			}
			}
			return 0;
			}

			int get_network_output_size(network net)
			{
			int i = net.n-1;

			@@ -41,6 +41,7 @@
			void print_network(network net);
			void visualize_network(network net);
			void save_network(network net, char *filename);
			int reset_network_size(network net, int h, int w, int c);

			#endif

			@@ -366,20 +366,21 @@

			void train_VOC()
			{
			network net = parse_network_cfg("cfg/voc_backup_ramp_80.cfg");
			network net = parse_network_cfg("cfg/voc_backup_sig_20.cfg");
			srand(2222222);
			int i = 0;
			int i = 20;
			char *labels[] = {"aeroplane","bicycle","bird","boat","bottle","bus","car","cat","chair","cow","diningtable","dog","horse","motorbike","person","pottedplant","sheep","sofa","train","tvmonitor"};
			float lr = .00001;
			float momentum = .9;
			float decay = 0.01;
			while(i++ < 1000 \|\| 1){
			visualize_network(net);
			cvWaitKey(100);
			data train = load_data_image_pathfile_random("images/VOC2012/train_paths.txt", 1000, labels, 20, 300, 400);

			image im = float_to_image(300, 400, 3,train.X.vals[0]);
			show_image(im, "input");
			visualize_network(net);
			cvWaitKey(100);

			normalize_data_rows(train);
			clock_t start = clock(), end;
			float loss = train_network_sgd(net, train, 1000, lr, momentum, decay);
			@@ -388,13 +389,61 @@
			free_data(train);
			if(i%10==0){
			char buff[256];
			sprintf(buff, "cfg/voc_backup_ramp_%d.cfg", i);
			sprintf(buff, "cfg/voc_backup_sig_%d.cfg", i);
			save_network(net, buff);
			}
			//lr *= .99;
			}
			}

			void features_VOC()
			{
			int i,j;
			network net = parse_network_cfg("cfg/voc_features.cfg");
			char *path_file = "images/VOC2012/all_paths.txt";
			char *out_dir = "voc_features/";
			list *paths = get_paths(path_file);
			node *n = paths->front;
			while(n){
			char path = (char )n->val;
			char buff[1024];
			sprintf(buff, "%s%s.txt",out_dir, path);
			FILE *fp = fopen(buff, "w");
			if(fp == 0) file_error(buff);

			IplImage* src = 0;
			if( (src = cvLoadImage(path,-1)) == 0 )
			{
			printf("Cannot load file image %s\n", path);
			exit(0);
			}

			for(i = 0; i < 10; ++i){
			int w = 1024 - 90*i; //PICKED WITH CAREFUL CROSS-VALIDATION!!!!
			int h = (int)((double)w/src->width * src->height);
			IplImage *sized = cvCreateImage(cvSize(w,h), src->depth, src->nChannels);
			cvResize(src, sized, CV_INTER_LINEAR);
			image im = ipl_to_image(sized);
			reset_network_size(net, im.h, im.w, im.c);
			forward_network(net, im.data);
			free_image(im);
			image out = get_network_image_layer(net, 5);
			fprintf(fp, "%d, %d, %d\n",out.c, out.h, out.w);
			for(j = 0; j < out.cout.hout.w; ++j){
			if(j != 0)fprintf(fp, ",");
			fprintf(fp, "%g", out.data[j]);
			}
			fprintf(fp, "\n");
			out.c = 1;
			show_image(out, "output");
			cvWaitKey(10);
			cvReleaseImage(&sized);
			}
			fclose(fp);
			n = n->next;
			}
			}

			int main()
			{
			//feenableexcept(FE_DIVBYZERO \| FE_INVALID \| FE_OVERFLOW);
			@@ -406,7 +455,8 @@
			//test_ensemble();
			//test_nist();
			//test_full();
			train_VOC();
			//train_VOC();
			features_VOC();
			//test_random_preprocess();
			//test_random_classify();
			//test_parser();