| | |
| | | if(layer.type == DETECTION){ |
| | | int i; |
| | | for(i = 0; i < layer.batch*layer.inputs; ++i){ |
| | | if((i%5) && !truth[(i/5)*5]) layer.delta[i] = 0; |
| | | if((i%25) && !truth[(i/25)*25]) layer.delta[i] = 0; |
| | | } |
| | | } |
| | | *(layer.output) = dot_cpu(layer.batch*layer.inputs, layer.delta, 1, layer.delta, 1); |
| | |
| | | axpy_ongpu(layer.batch*layer.inputs, -1, input, 1, layer.delta_gpu, 1); |
| | | |
| | | if(layer.type==DETECTION){ |
| | | mask_ongpu(layer.inputs*layer.batch, layer.delta_gpu, truth, 5); |
| | | mask_ongpu(layer.inputs*layer.batch, layer.delta_gpu, truth, 25); |
| | | } |
| | | |
| | | cuda_pull_array(layer.delta_gpu, layer.delta, layer.batch*layer.inputs); |
| | |
| | | save_network(net, "cfg/trained_imagenet_smaller.cfg"); |
| | | } |
| | | |
| | | char *class_names[] = {"aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"}; |
| | | #define AMNT 3 |
| | | void draw_detection(image im, float *box, int side) |
| | | { |
| | | int classes = 20; |
| | | int elems = 4+classes+1; |
| | | int j; |
| | | int r, c; |
| | | float amount[AMNT] = {0}; |
| | | for(r = 0; r < side*side; ++r){ |
| | | float val = box[r*5]; |
| | | float val = box[r*elems]; |
| | | for(j = 0; j < AMNT; ++j){ |
| | | if(val > amount[j]) { |
| | | float swap = val; |
| | |
| | | |
| | | for(r = 0; r < side; ++r){ |
| | | for(c = 0; c < side; ++c){ |
| | | j = (r*side + c) * 5; |
| | | printf("Prob: %f\n", box[j]); |
| | | j = (r*side + c) * elems; |
| | | //printf("%d\n", j); |
| | | //printf("Prob: %f\n", box[j]); |
| | | if(box[j] >= smallest){ |
| | | int class = max_index(box+j+1, classes); |
| | | int z; |
| | | for(z = 0; z < classes; ++z) printf("%f %s\n", box[j+1+z], class_names[z]); |
| | | printf("%f %s\n", box[j+1+class], class_names[class]); |
| | | float red = get_color(0,class,classes); |
| | | float green = get_color(1,class,classes); |
| | | float blue = get_color(2,class,classes); |
| | | |
| | | j += classes; |
| | | int d = im.w/side; |
| | | int y = r*d+box[j+1]*d; |
| | | int x = c*d+box[j+2]*d; |
| | | int h = box[j+3]*im.h; |
| | | int w = box[j+4]*im.w; |
| | | //printf("%f %f %f %f\n", box[j+1], box[j+2], box[j+3], box[j+4]); |
| | | //printf("%d %d %d %d\n", x, y, w, h); |
| | | //printf("%d %d %d %d\n", x-w/2, y-h/2, x+w/2, y+h/2); |
| | | draw_box(im, x-w/2, y-h/2, x+w/2, y+h/2); |
| | | draw_box(im, x-w/2, y-h/2, x+w/2, y+h/2,red,green,blue); |
| | | } |
| | | } |
| | | } |
| | | //printf("Done\n"); |
| | | show_image(im, "box"); |
| | | cvWaitKey(0); |
| | | } |
| | |
| | | srand(time(0)); |
| | | //srand(23410); |
| | | int i = net.seen/imgs; |
| | | list *plist = get_paths("/home/pjreddie/data/imagenet/horse_pos.txt"); |
| | | list *plist = get_paths("/home/pjreddie/data/voc/train.txt"); |
| | | char **paths = (char **)list_to_array(plist); |
| | | printf("%d\n", plist->size); |
| | | data train, buffer; |
| | | int im_dim = 512; |
| | | int jitter = 64; |
| | | pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, im_dim, im_dim, 7, 7, jitter, &buffer); |
| | | pthread_t load_thread = load_data_detection_thread(imgs, paths, plist->size, 20, im_dim, im_dim, 7, 7, jitter, &buffer); |
| | | clock_t time; |
| | | while(1){ |
| | | i += 1; |
| | | time=clock(); |
| | | pthread_join(load_thread, 0); |
| | | train = buffer; |
| | | load_thread = load_data_detection_thread(imgs, paths, plist->size, im_dim, im_dim, 7, 7, jitter, &buffer); |
| | | load_thread = load_data_detection_thread(imgs, paths, plist->size, 20, im_dim, im_dim, 7, 7, jitter, &buffer); |
| | | |
| | | /* |
| | | image im = float_to_image(im_dim - jitter, im_dim-jitter, 3, train.X.vals[923]); |
| | | draw_detection(im, train.y.vals[923], 7); |
| | | /* |
| | | image im = float_to_image(im_dim - jitter, im_dim-jitter, 3, train.X.vals[0]); |
| | | draw_detection(im, train.y.vals[0], 7); |
| | | show_image(im, "truth"); |
| | | cvWaitKey(0); |
| | | */ |
| | |
| | | net.seen += imgs; |
| | | avg_loss = avg_loss*.9 + loss*.1; |
| | | printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs); |
| | | if(i%100==0){ |
| | | if(i%800==0){ |
| | | char buff[256]; |
| | | sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); |
| | | save_weights(net, buff); |
| | |
| | | fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); |
| | | srand(time(0)); |
| | | |
| | | list *plist = get_paths("/home/pjreddie/data/imagenet/detection.val"); |
| | | list *plist = get_paths("/home/pjreddie/data/voc/val.txt"); |
| | | char **paths = (char **)list_to_array(plist); |
| | | int num_output = 1225; |
| | | int im_size = 448; |
| | | int classes = 20; |
| | | |
| | | int m = plist->size; |
| | | int i = 0; |
| | | int splits = 50; |
| | | int splits = 100; |
| | | int num = (i+1)*m/splits - i*m/splits; |
| | | |
| | | fprintf(stderr, "%d\n", m); |
| | | data val, buffer; |
| | | pthread_t load_thread = load_data_thread(paths, num, 0, 0, 245, 224, 224, &buffer); |
| | | pthread_t load_thread = load_data_thread(paths, num, 0, 0, num_output, im_size, im_size, &buffer); |
| | | clock_t time; |
| | | for(i = 1; i <= splits; ++i){ |
| | | time=clock(); |
| | |
| | | |
| | | num = (i+1)*m/splits - i*m/splits; |
| | | char **part = paths+(i*m/splits); |
| | | if(i != splits) load_thread = load_data_thread(part, num, 0, 0, 245, 224, 224, &buffer); |
| | | if(i != splits) load_thread = load_data_thread(part, num, 0, 0, num_output, im_size, im_size, &buffer); |
| | | |
| | | fprintf(stderr, "Loaded: %lf seconds\n", sec(clock()-time)); |
| | | fprintf(stderr, "%d: Loaded: %lf seconds\n", i, sec(clock()-time)); |
| | | matrix pred = network_predict_data(net, val); |
| | | int j, k; |
| | | int j, k, class; |
| | | for(j = 0; j < pred.rows; ++j){ |
| | | for(k = 0; k < pred.cols; k += 5){ |
| | | if (pred.vals[j][k] > .005){ |
| | | int index = k/5; |
| | | for(k = 0; k < pred.cols; k += classes+4+1){ |
| | | |
| | | /* |
| | | int z; |
| | | for(z = 0; z < 25; ++z) printf("%f, ", pred.vals[j][k+z]); |
| | | printf("\n"); |
| | | */ |
| | | |
| | | float p = pred.vals[j][k]; |
| | | //if (pred.vals[j][k] > .001){ |
| | | for(class = 0; class < classes; ++class){ |
| | | int index = (k)/(classes+4+1); |
| | | int r = index/7; |
| | | int c = index%7; |
| | | float y = (32.*(r + pred.vals[j][k+1]))/224.; |
| | | float x = (32.*(c + pred.vals[j][k+2]))/224.; |
| | | float h = (256.*(pred.vals[j][k+3]))/224.; |
| | | float w = (256.*(pred.vals[j][k+4]))/224.; |
| | | printf("%d %f %f %f %f %f\n", (i-1)*m/splits + j + 1, pred.vals[j][k], y, x, h, w); |
| | | float y = (r + pred.vals[j][k+1+classes])/7.; |
| | | float x = (c + pred.vals[j][k+2+classes])/7.; |
| | | float h = pred.vals[j][k+3+classes]; |
| | | float w = pred.vals[j][k+4+classes]; |
| | | printf("%d %d %f %f %f %f %f\n", (i-1)*m/splits + j, class, p*pred.vals[j][k+class+1], y, x, h, w); |
| | | } |
| | | //} |
| | | } |
| | | } |
| | | |
| | |
| | | } |
| | | /* |
| | | |
| | | void train_imagenet_distributed(char *address) |
| | | { |
| | | float avg_loss = 1; |
| | | srand(time(0)); |
| | | network net = parse_network_cfg("cfg/net.cfg"); |
| | | set_learning_network(&net, 0, 1, 0); |
| | | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); |
| | | int imgs = net.batch; |
| | | int i = 0; |
| | | char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list"); |
| | | list *plist = get_paths("/data/imagenet/cls.train.list"); |
| | | char **paths = (char **)list_to_array(plist); |
| | | printf("%d\n", plist->size); |
| | | clock_t time; |
| | | data train, buffer; |
| | | pthread_t load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer); |
| | | while(1){ |
| | | i += 1; |
| | | void train_imagenet_distributed(char *address) |
| | | { |
| | | float avg_loss = 1; |
| | | srand(time(0)); |
| | | network net = parse_network_cfg("cfg/net.cfg"); |
| | | set_learning_network(&net, 0, 1, 0); |
| | | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); |
| | | int imgs = net.batch; |
| | | int i = 0; |
| | | char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list"); |
| | | list *plist = get_paths("/data/imagenet/cls.train.list"); |
| | | char **paths = (char **)list_to_array(plist); |
| | | printf("%d\n", plist->size); |
| | | clock_t time; |
| | | data train, buffer; |
| | | pthread_t load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer); |
| | | while(1){ |
| | | i += 1; |
| | | |
| | | time=clock(); |
| | | client_update(net, address); |
| | | printf("Updated: %lf seconds\n", sec(clock()-time)); |
| | | time=clock(); |
| | | client_update(net, address); |
| | | printf("Updated: %lf seconds\n", sec(clock()-time)); |
| | | |
| | | time=clock(); |
| | | pthread_join(load_thread, 0); |
| | | train = buffer; |
| | | normalize_data_rows(train); |
| | | load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer); |
| | | printf("Loaded: %lf seconds\n", sec(clock()-time)); |
| | | time=clock(); |
| | | time=clock(); |
| | | pthread_join(load_thread, 0); |
| | | train = buffer; |
| | | normalize_data_rows(train); |
| | | load_thread = load_data_thread(paths, imgs, plist->size, labels, 1000, 224, 224, &buffer); |
| | | printf("Loaded: %lf seconds\n", sec(clock()-time)); |
| | | time=clock(); |
| | | |
| | | float loss = train_network(net, train); |
| | | avg_loss = avg_loss*.9 + loss*.1; |
| | | printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs); |
| | | free_data(train); |
| | | } |
| | | } |
| | | */ |
| | | float loss = train_network(net, train); |
| | | avg_loss = avg_loss*.9 + loss*.1; |
| | | printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs); |
| | | free_data(train); |
| | | } |
| | | } |
| | | */ |
| | | |
| | | void convert(char *cfgfile, char *outfile, char *weightfile) |
| | | { |
| | |
| | | save_network(net, outfile); |
| | | } |
| | | |
| | | void train_captcha(char *cfgfile, char *weightfile) |
| | | { |
| | | float avg_loss = -1; |
| | | srand(time(0)); |
| | | char *base = basename(cfgfile); |
| | | printf("%s\n", base); |
| | | network net = parse_network_cfg(cfgfile); |
| | | if(weightfile){ |
| | | load_weights(&net, weightfile); |
| | | } |
| | | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); |
| | | int imgs = 1024; |
| | | int i = net.seen/imgs; |
| | | list *plist = get_paths("/data/captcha/train.list"); |
| | | char **paths = (char **)list_to_array(plist); |
| | | printf("%d\n", plist->size); |
| | | clock_t time; |
| | | while(1){ |
| | | ++i; |
| | | time=clock(); |
| | | data train = load_data_captcha(paths, imgs, plist->size, 10, 60, 200); |
| | | translate_data_rows(train, -128); |
| | | scale_data_rows(train, 1./128); |
| | | printf("Loaded: %lf seconds\n", sec(clock()-time)); |
| | | time=clock(); |
| | | float loss = train_network(net, train); |
| | | net.seen += imgs; |
| | | if(avg_loss == -1) avg_loss = loss; |
| | | avg_loss = avg_loss*.9 + loss*.1; |
| | | printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen); |
| | | free_data(train); |
| | | if(i%100==0){ |
| | | char buff[256]; |
| | | sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); |
| | | save_weights(net, buff); |
| | | } |
| | | } |
| | | } |
| | | |
| | | |
| | | void validate_captcha(char *cfgfile, char *weightfile) |
| | | { |
| | | srand(time(0)); |
| | | char *base = basename(cfgfile); |
| | | printf("%s\n", base); |
| | | network net = parse_network_cfg(cfgfile); |
| | | if(weightfile){ |
| | | load_weights(&net, weightfile); |
| | | } |
| | | int imgs = 1000; |
| | | int numchars = 37; |
| | | list *plist = get_paths("/data/captcha/valid.list"); |
| | | char **paths = (char **)list_to_array(plist); |
| | | data valid = load_data_captcha(paths, imgs, 0, 10, 60, 200); |
| | | translate_data_rows(valid, -128); |
| | | scale_data_rows(valid, 1./128); |
| | | matrix pred = network_predict_data(net, valid); |
| | | int i, k; |
| | | int correct = 0; |
| | | int total = 0; |
| | | int accuracy = 0; |
| | | for(i = 0; i < imgs; ++i){ |
| | | int allcorrect = 1; |
| | | for(k = 0; k < 10; ++k){ |
| | | char truth = int_to_alphanum(max_index(valid.y.vals[i]+k*numchars, numchars)); |
| | | char prediction = int_to_alphanum(max_index(pred.vals[i]+k*numchars, numchars)); |
| | | if (truth != prediction) allcorrect=0; |
| | | if (truth != '.' && truth == prediction) ++correct; |
| | | if (truth != '.' || truth != prediction) ++total; |
| | | } |
| | | accuracy += allcorrect; |
| | | } |
| | | printf("Word Accuracy: %f, Char Accuracy %f\n", (float)accuracy/imgs, (float)correct/total); |
| | | free_data(valid); |
| | | } |
| | | |
| | | void test_captcha(char *cfgfile, char *weightfile) |
| | | { |
| | | srand(time(0)); |
| | | char *base = basename(cfgfile); |
| | | printf("%s\n", base); |
| | | network net = parse_network_cfg(cfgfile); |
| | | set_batch_network(&net, 1); |
| | | if(weightfile){ |
| | | load_weights(&net, weightfile); |
| | | } |
| | | clock_t time; |
| | | char filename[256]; |
| | | while(1){ |
| | | printf("Enter filename: "); |
| | | fgets(filename, 256, stdin); |
| | | strtok(filename, "\n"); |
| | | time = clock(); |
| | | image im = load_image_color(filename, 60, 200); |
| | | translate_image(im, -128); |
| | | scale_image(im, 1/128.); |
| | | float *X = im.data; |
| | | time=clock(); |
| | | float *predictions = network_predict(net, X); |
| | | printf("Predicted in %f\n", sec(clock() - time)); |
| | | print_letters(predictions, 10); |
| | | free_image(im); |
| | | } |
| | | } |
| | | |
| | | void train_imagenet(char *cfgfile, char *weightfile) |
| | | { |
| | | float avg_loss = -1; |
| | |
| | | if(weightfile){ |
| | | load_weights(&net, weightfile); |
| | | } |
| | | int im_size = 224; |
| | | set_batch_network(&net, 1); |
| | | srand(2222222); |
| | | clock_t time; |
| | |
| | | while(1){ |
| | | fgets(filename, 256, stdin); |
| | | strtok(filename, "\n"); |
| | | image im = load_image_color(filename, 224, 224); |
| | | image im = load_image_color(filename, im_size, im_size); |
| | | translate_image(im, -128); |
| | | scale_image(im, 1/128.); |
| | | printf("%d %d %d\n", im.h, im.w, im.c); |
| | |
| | | else if(0==strcmp(argv[1], "nist")) train_nist(argv[2]); |
| | | else if(0==strcmp(argv[1], "ctest")) test_cifar10(argv[2]); |
| | | else if(0==strcmp(argv[1], "train")) train_imagenet(argv[2], (argc > 3)? argv[3] : 0); |
| | | else if(0==strcmp(argv[1], "captcha")) train_captcha(argv[2], (argc > 3)? argv[3] : 0); |
| | | else if(0==strcmp(argv[1], "tcaptcha")) test_captcha(argv[2], (argc > 3)? argv[3] : 0); |
| | | else if(0==strcmp(argv[1], "vcaptcha")) validate_captcha(argv[2], (argc > 3)? argv[3] : 0); |
| | | else if(0==strcmp(argv[1], "testseg")) test_voc_segment(argv[2], (argc > 3)? argv[3] : 0); |
| | | //else if(0==strcmp(argv[1], "client")) train_imagenet_distributed(argv[2]); |
| | | else if(0==strcmp(argv[1], "detect")) test_detection(argv[2], (argc > 3)? argv[3] : 0); |
| | |
| | | int nh; |
| | | int nw; |
| | | int jitter; |
| | | int classes; |
| | | data *d; |
| | | }; |
| | | |
| | |
| | | return lines; |
| | | } |
| | | |
| | | void fill_truth_detection(char *path, float *truth, int height, int width, int num_height, int num_width, int dy, int dx, int jitter) |
| | | char **get_random_paths(char **paths, int n, int m) |
| | | { |
| | | int box_height = height/num_height; |
| | | int box_width = width/num_width; |
| | | char *labelpath = find_replace(path, "imgs", "det/train"); |
| | | labelpath = find_replace(labelpath, ".JPEG", ".txt"); |
| | | FILE *file = fopen(labelpath, "r"); |
| | | if(!file) file_error(labelpath); |
| | | float x, y, h, w; |
| | | while(fscanf(file, "%f %f %f %f", &x, &y, &w, &h) == 4){ |
| | | x *= width + jitter; |
| | | y *= height + jitter; |
| | | x -= dx; |
| | | y -= dy; |
| | | int i = x/box_width; |
| | | int j = y/box_height; |
| | | |
| | | if(i < 0) i = 0; |
| | | if(i >= num_width) i = num_width-1; |
| | | if(j < 0) j = 0; |
| | | if(j >= num_height) j = num_height-1; |
| | | |
| | | float dw = (x - i*box_width)/box_width; |
| | | float dh = (y - j*box_height)/box_height; |
| | | //printf("%d %d %f %f\n", i, j, dh, dw); |
| | | int index = (i+j*num_width)*5; |
| | | truth[index++] = 1; |
| | | truth[index++] = dh; |
| | | truth[index++] = dw; |
| | | truth[index++] = h*(height+jitter)/height; |
| | | truth[index++] = w*(width+jitter)/width; |
| | | } |
| | | fclose(file); |
| | | } |
| | | |
| | | void fill_truth(char *path, char **labels, int k, float *truth) |
| | | { |
| | | char **random_paths = calloc(n, sizeof(char*)); |
| | | int i; |
| | | memset(truth, 0, k*sizeof(float)); |
| | | int count = 0; |
| | | for(i = 0; i < k; ++i){ |
| | | if(strstr(path, labels[i])){ |
| | | truth[i] = 1; |
| | | ++count; |
| | | } |
| | | for(i = 0; i < n; ++i){ |
| | | int index = rand()%m; |
| | | random_paths[i] = paths[index]; |
| | | if(i == 0) printf("%s\n", paths[index]); |
| | | } |
| | | if(count != 1) printf("%d, %s\n", count, path); |
| | | return random_paths; |
| | | } |
| | | |
| | | matrix load_image_paths(char **paths, int n, int h, int w) |
| | |
| | | return X; |
| | | } |
| | | |
| | | char **get_random_paths(char **paths, int n, int m) |
| | | void fill_truth_detection(char *path, float *truth, int classes, int height, int width, int num_height, int num_width, int dy, int dx, int jitter, int flip) |
| | | { |
| | | char **random_paths = calloc(n, sizeof(char*)); |
| | | int box_height = height/num_height; |
| | | int box_width = width/num_width; |
| | | char *labelpath = find_replace(path, "VOC2012/JPEGImages", "labels"); |
| | | labelpath = find_replace(labelpath, ".jpg", ".txt"); |
| | | FILE *file = fopen(labelpath, "r"); |
| | | if(!file) file_error(labelpath); |
| | | float x, y, h, w; |
| | | int id; |
| | | while(fscanf(file, "%d %f %f %f %f", &id, &x, &y, &w, &h) == 5){ |
| | | if(flip) x = 1-x; |
| | | x *= width + jitter; |
| | | y *= height + jitter; |
| | | x -= dx; |
| | | y -= dy; |
| | | int i = x/box_width; |
| | | int j = y/box_height; |
| | | |
| | | if(i < 0) i = 0; |
| | | if(i >= num_width) i = num_width-1; |
| | | if(j < 0) j = 0; |
| | | if(j >= num_height) j = num_height-1; |
| | | |
| | | float dw = (x - i*box_width)/box_width; |
| | | float dh = (y - j*box_height)/box_height; |
| | | //printf("%d %d %d %f %f\n", id, i, j, dh, dw); |
| | | int index = (i+j*num_width)*(4+classes+1); |
| | | truth[index++] = 1; |
| | | truth[index+id] = 1; |
| | | index += classes; |
| | | truth[index++] = dh; |
| | | truth[index++] = dw; |
| | | truth[index++] = h*(height+jitter)/height; |
| | | truth[index++] = w*(width+jitter)/width; |
| | | } |
| | | fclose(file); |
| | | } |
| | | |
| | | #define NUMCHARS 37 |
| | | |
| | | void print_letters(float *pred, int n) |
| | | { |
| | | int i; |
| | | for(i = 0; i < n; ++i){ |
| | | int index = rand()%m; |
| | | random_paths[i] = paths[index]; |
| | | if(i == 0) printf("%s\n", paths[index]); |
| | | int index = max_index(pred+i*NUMCHARS, NUMCHARS); |
| | | printf("%c", int_to_alphanum(index)); |
| | | } |
| | | return random_paths; |
| | | printf("\n"); |
| | | } |
| | | |
| | | void fill_truth_captcha(char *path, int n, float *truth) |
| | | { |
| | | char *begin = strrchr(path, '/'); |
| | | ++begin; |
| | | int i; |
| | | for(i = 0; i < strlen(begin) && i < n && begin[i] != '.'; ++i){ |
| | | int index = alphanum_to_int(begin[i]); |
| | | if(index > 35) printf("Bad %c\n", begin[i]); |
| | | truth[i*NUMCHARS+index] = 1; |
| | | } |
| | | for(;i < n; ++i){ |
| | | truth[i*NUMCHARS + NUMCHARS-1] = 1; |
| | | } |
| | | } |
| | | |
| | | data load_data_captcha(char **paths, int n, int m, int k, int h, int w) |
| | | { |
| | | if(m) paths = get_random_paths(paths, n, m); |
| | | data d; |
| | | d.shallow = 0; |
| | | d.X = load_image_paths(paths, n, h, w); |
| | | d.y = make_matrix(n, k*NUMCHARS); |
| | | int i; |
| | | for(i = 0; i < n; ++i){ |
| | | fill_truth_captcha(paths[i], k, d.y.vals[i]); |
| | | } |
| | | if(m) free(paths); |
| | | return d; |
| | | } |
| | | |
| | | |
| | | void fill_truth(char *path, char **labels, int k, float *truth) |
| | | { |
| | | int i; |
| | | memset(truth, 0, k*sizeof(float)); |
| | | int count = 0; |
| | | for(i = 0; i < k; ++i){ |
| | | if(strstr(path, labels[i])){ |
| | | truth[i] = 1; |
| | | ++count; |
| | | } |
| | | } |
| | | if(count != 1) printf("%d, %s\n", count, path); |
| | | } |
| | | |
| | | matrix load_labels_paths(char **paths, int n, char **labels, int k) |
| | |
| | | return y; |
| | | } |
| | | |
| | | matrix load_labels_detection(char **paths, int n, int height, int width, int num_height, int num_width) |
| | | { |
| | | int k = num_height*num_width*5; |
| | | matrix y = make_matrix(n, k); |
| | | int i; |
| | | for(i = 0; i < n; ++i){ |
| | | fill_truth_detection(paths[i], y.vals[i], height, width, num_height, num_width, 0, 0, 0); |
| | | } |
| | | return y; |
| | | } |
| | | |
| | | data load_data_image_pathfile(char *filename, char **labels, int k, int h, int w) |
| | | { |
| | | list *plist = get_paths(filename); |
| | |
| | | } |
| | | } |
| | | |
| | | data load_data_detection_jitter_random(int n, char **paths, int m, int h, int w, int nh, int nw, int jitter) |
| | | data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter) |
| | | { |
| | | char **random_paths = get_random_paths(paths, n, m); |
| | | int i; |
| | | data d; |
| | | d.shallow = 0; |
| | | d.X = load_image_paths(random_paths, n, h, w); |
| | | int k = nh*nw*5; |
| | | int k = nh*nw*(4+classes+1); |
| | | d.y = make_matrix(n, k); |
| | | for(i = 0; i < n; ++i){ |
| | | int dx = rand()%jitter; |
| | | int dy = rand()%jitter; |
| | | fill_truth_detection(random_paths[i], d.y.vals[i], h-jitter, w-jitter, nh, nw, dy, dx, jitter); |
| | | int flip = rand()%2; |
| | | fill_truth_detection(random_paths[i], d.y.vals[i], classes, h-jitter, w-jitter, nh, nw, dy, dx, jitter, flip); |
| | | image a = float_to_image(h, w, 3, d.X.vals[i]); |
| | | if(flip) flip_image(a); |
| | | jitter_image(a,h-jitter,w-jitter,dy,dx); |
| | | } |
| | | d.X.cols = (h-jitter)*(w-jitter)*3; |
| | |
| | | void *load_detection_thread(void *ptr) |
| | | { |
| | | struct load_args a = *(struct load_args*)ptr; |
| | | *a.d = load_data_detection_jitter_random(a.n, a.paths, a.m, a.h, a.w, a.nh, a.nw, a.jitter); |
| | | *a.d = load_data_detection_jitter_random(a.n, a.paths, a.m, a.classes, a.h, a.w, a.nh, a.nw, a.jitter); |
| | | translate_data_rows(*a.d, -128); |
| | | scale_data_rows(*a.d, 1./128); |
| | | free(ptr); |
| | | return 0; |
| | | } |
| | | |
| | | pthread_t load_data_detection_thread(int n, char **paths, int m, int h, int w, int nh, int nw, int jitter, data *d) |
| | | pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, data *d) |
| | | { |
| | | pthread_t thread; |
| | | struct load_args *args = calloc(1, sizeof(struct load_args)); |
| | |
| | | args->w = w; |
| | | args->nh = nh; |
| | | args->nw = nw; |
| | | args->classes = classes; |
| | | args->jitter = jitter; |
| | | args->d = d; |
| | | if(pthread_create(&thread, 0, load_detection_thread, args)) { |
| | |
| | | return thread; |
| | | } |
| | | |
| | | data load_data_detection_random(int n, char **paths, int m, int h, int w, int nh, int nw) |
| | | { |
| | | char **random_paths = get_random_paths(paths, n, m); |
| | | data d; |
| | | d.shallow = 0; |
| | | d.X = load_image_paths(random_paths, n, h, w); |
| | | d.y = load_labels_detection(random_paths, n, h, w, nh, nw); |
| | | free(random_paths); |
| | | return d; |
| | | } |
| | | |
| | | data load_data(char **paths, int n, int m, char **labels, int k, int h, int w) |
| | | { |
| | | if(m) paths = get_random_paths(paths, n, m); |
| | |
| | | |
| | | void free_data(data d); |
| | | |
| | | void print_letters(float *pred, int n); |
| | | data load_data_captcha(char **paths, int n, int m, int k, int h, int w); |
| | | data load_data(char **paths, int n, int m, char **labels, int k, int h, int w); |
| | | pthread_t load_data_thread(char **paths, int n, int m, char **labels, int k, int h, int w, data *d); |
| | | |
| | | pthread_t load_data_detection_thread(int n, char **paths, int m, int h, int w, int nh, int nw, int jitter, data *d); |
| | | data load_data_detection_jitter_random(int n, char **paths, int m, int h, int w, int nh, int nw, int jitter); |
| | | data load_data_detection_random(int n, char **paths, int m, int h, int w, int nh, int nw); |
| | | pthread_t load_data_detection_thread(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter, data *d); |
| | | data load_data_detection_jitter_random(int n, char **paths, int m, int classes, int h, int w, int nh, int nw, int jitter); |
| | | |
| | | data load_data_image_pathfile(char *filename, char **labels, int k, int h, int w); |
| | | data load_cifar10_data(char *filename); |
| | |
| | | |
| | | int windows = 0; |
| | | |
| | | void draw_box(image a, int x1, int y1, int x2, int y2) |
| | | float colors[6][3] = { {1,0,1}, {0,0,1},{0,1,1},{0,1,0},{1,1,0},{1,0,0} }; |
| | | |
| | | float get_color(int c, int x, int max) |
| | | { |
| | | int i, c; |
| | | float ratio = ((float)x/max)*5; |
| | | int i = floor(ratio); |
| | | int j = ceil(ratio); |
| | | ratio -= i; |
| | | float r = (1-ratio) * colors[i][c] + ratio*colors[j][c]; |
| | | printf("%f\n", r); |
| | | return r; |
| | | } |
| | | |
| | | void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b) |
| | | { |
| | | normalize_image(a); |
| | | int i; |
| | | if(x1 < 0) x1 = 0; |
| | | if(x1 >= a.w) x1 = a.w-1; |
| | | if(x2 < 0) x2 = 0; |
| | |
| | | if(y2 < 0) y2 = 0; |
| | | if(y2 >= a.h) y2 = a.h-1; |
| | | |
| | | for(c = 0; c < a.c; ++c){ |
| | | for(i = x1; i < x2; ++i){ |
| | | a.data[i + y1*a.w + c*a.w*a.h] = (c==0)?1:-1; |
| | | a.data[i + y2*a.w + c*a.w*a.h] = (c==0)?1:-1; |
| | | } |
| | | for(i = x1; i < x2; ++i){ |
| | | a.data[i + y1*a.w + 0*a.w*a.h] = b; |
| | | a.data[i + y2*a.w + 0*a.w*a.h] = b; |
| | | |
| | | a.data[i + y1*a.w + 1*a.w*a.h] = g; |
| | | a.data[i + y2*a.w + 1*a.w*a.h] = g; |
| | | |
| | | a.data[i + y1*a.w + 2*a.w*a.h] = r; |
| | | a.data[i + y2*a.w + 2*a.w*a.h] = r; |
| | | } |
| | | for(c = 0; c < a.c; ++c){ |
| | | for(i = y1; i < y2; ++i){ |
| | | a.data[x1 + i*a.w + c*a.w*a.h] = (c==0)?1:-1; |
| | | a.data[x2 + i*a.w + c*a.w*a.h] = (c==0)?1:-1; |
| | | } |
| | | for(i = y1; i < y2; ++i){ |
| | | a.data[x1 + i*a.w + 0*a.w*a.h] = b; |
| | | a.data[x2 + i*a.w + 0*a.w*a.h] = b; |
| | | |
| | | a.data[x1 + i*a.w + 1*a.w*a.h] = g; |
| | | a.data[x2 + i*a.w + 1*a.w*a.h] = g; |
| | | |
| | | a.data[x1 + i*a.w + 2*a.w*a.h] = r; |
| | | a.data[x2 + i*a.w + 2*a.w*a.h] = r; |
| | | } |
| | | } |
| | | |
| | |
| | | } |
| | | } |
| | | |
| | | void flip_image(image a) |
| | | { |
| | | int i,j,k; |
| | | for(k = 0; k < a.c; ++k){ |
| | | for(i = 0; i < a.h; ++i){ |
| | | for(j = 0; j < a.w/2; ++j){ |
| | | int index = j + a.w*(i + a.h*(k)); |
| | | int flip = (a.w - j - 1) + a.w*(i + a.h*(k)); |
| | | float swap = a.data[flip]; |
| | | a.data[flip] = a.data[index]; |
| | | a.data[index] = swap; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | |
| | | image image_distance(image a, image b) |
| | | { |
| | | int i,j; |
| | |
| | | float *data; |
| | | } image; |
| | | |
| | | float get_color(int c, int x, int max); |
| | | void jitter_image(image a, int h, int w, int dh, int dw); |
| | | void draw_box(image a, int x1, int y1, int x2, int y2); |
| | | void flip_image(image a); |
| | | void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b); |
| | | image image_distance(image a, image b); |
| | | void scale_image(image m, float s); |
| | | void translate_image(image m, float s); |
| | |
| | | |
| | | extern "C" float * get_network_output_gpu_layer(network net, int i); |
| | | extern "C" float * get_network_delta_gpu_layer(network net, int i); |
| | | float *get_network_output_gpu(network net); |
| | | |
| | | void forward_network_gpu(network net, float * input, float * truth, int train) |
| | | { |
| | |
| | | //time = clock(); |
| | | update_network_gpu(net); |
| | | float error = get_network_cost(net); |
| | | |
| | | //print_letters(y, 50); |
| | | //float *out = get_network_output_gpu(net); |
| | | //print_letters(out, 50); |
| | | //printf("updt %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | return error; |
| | |
| | | softmax_layer *parse_softmax(list *options, network *net, int count) |
| | | { |
| | | int input; |
| | | int groups = option_find_int(options, "groups",1); |
| | | if(count == 0){ |
| | | input = option_find_int(options, "input",1); |
| | | net->batch = option_find_int(options, "batch",1); |
| | |
| | | }else{ |
| | | input = get_network_output_size_layer(*net, count-1); |
| | | } |
| | | softmax_layer *layer = make_softmax_layer(net->batch, input); |
| | | softmax_layer *layer = make_softmax_layer(net->batch, groups, input); |
| | | option_unused(options); |
| | | return layer; |
| | | } |
| | |
| | | #include <math.h> |
| | | #include <stdlib.h> |
| | | #include <stdio.h> |
| | | #include <assert.h> |
| | | |
| | | softmax_layer *make_softmax_layer(int batch, int inputs) |
| | | softmax_layer *make_softmax_layer(int batch, int groups, int inputs) |
| | | { |
| | | assert(inputs%groups == 0); |
| | | fprintf(stderr, "Softmax Layer: %d inputs\n", inputs); |
| | | softmax_layer *layer = calloc(1, sizeof(softmax_layer)); |
| | | layer->batch = batch; |
| | | layer->groups = groups; |
| | | layer->inputs = inputs; |
| | | layer->output = calloc(inputs*batch, sizeof(float)); |
| | | layer->delta = calloc(inputs*batch, sizeof(float)); |
| | | layer->jacobian = calloc(inputs*inputs*batch, sizeof(float)); |
| | | #ifdef GPU |
| | | layer->output_gpu = cuda_make_array(layer->output, inputs*batch); |
| | | layer->delta_gpu = cuda_make_array(layer->delta, inputs*batch); |
| | |
| | | return layer; |
| | | } |
| | | |
| | | void softmax_array(float *input, int n, float *output) |
| | | { |
| | | int i; |
| | | float sum = 0; |
| | | float largest = -FLT_MAX; |
| | | for(i = 0; i < n; ++i){ |
| | | if(input[i] > largest) largest = input[i]; |
| | | } |
| | | for(i = 0; i < n; ++i){ |
| | | sum += exp(input[i]-largest); |
| | | } |
| | | if(sum) sum = largest+log(sum); |
| | | else sum = largest-100; |
| | | for(i = 0; i < n; ++i){ |
| | | output[i] = exp(input[i]-sum); |
| | | } |
| | | } |
| | | |
| | | void forward_softmax_layer(const softmax_layer layer, float *input) |
| | | { |
| | | int i,b; |
| | | for(b = 0; b < layer.batch; ++b){ |
| | | float sum = 0; |
| | | float largest = -FLT_MAX; |
| | | for(i = 0; i < layer.inputs; ++i){ |
| | | if(input[i+b*layer.inputs] > largest) largest = input[i+b*layer.inputs]; |
| | | } |
| | | for(i = 0; i < layer.inputs; ++i){ |
| | | sum += exp(input[i+b*layer.inputs]-largest); |
| | | } |
| | | if(sum) sum = largest+log(sum); |
| | | else sum = largest-100; |
| | | for(i = 0; i < layer.inputs; ++i){ |
| | | layer.output[i+b*layer.inputs] = exp(input[i+b*layer.inputs]-sum); |
| | | } |
| | | int b; |
| | | int inputs = layer.inputs / layer.groups; |
| | | int batch = layer.batch * layer.groups; |
| | | for(b = 0; b < batch; ++b){ |
| | | softmax_array(input+b*inputs, inputs, layer.output+b*inputs); |
| | | } |
| | | } |
| | | |
| | |
| | | typedef struct { |
| | | int inputs; |
| | | int batch; |
| | | int groups; |
| | | float *delta; |
| | | float *output; |
| | | float *jacobian; |
| | | #ifdef GPU |
| | | float * delta_gpu; |
| | | float * output_gpu; |
| | | #endif |
| | | } softmax_layer; |
| | | |
| | | softmax_layer *make_softmax_layer(int batch, int inputs); |
| | | softmax_layer *make_softmax_layer(int batch, int groups, int inputs); |
| | | void forward_softmax_layer(const softmax_layer layer, float *input); |
| | | void backward_softmax_layer(const softmax_layer layer, float *delta); |
| | | |
| | |
| | | |
| | | extern "C" void forward_softmax_layer_gpu(const softmax_layer layer, float *input) |
| | | { |
| | | forward_softmax_layer_kernel<<<cuda_gridsize(layer.batch), BLOCK>>>(layer.inputs, layer.batch, input, layer.output_gpu); |
| | | int inputs = layer.inputs / layer.groups; |
| | | int batch = layer.batch * layer.groups; |
| | | forward_softmax_layer_kernel<<<cuda_gridsize(batch), BLOCK>>>(inputs, batch, input, layer.output_gpu); |
| | | check_error(cudaPeekAtLastError()); |
| | | |
| | | /* |
| | |
| | | |
| | | #include "utils.h" |
| | | |
| | | |
| | | int alphanum_to_int(char c) |
| | | { |
| | | return (c < 58) ? c - 48 : c-87; |
| | | } |
| | | char int_to_alphanum(int i) |
| | | { |
| | | if (i == 36) return '.'; |
| | | return (i < 10) ? i + 48 : i + 87; |
| | | } |
| | | |
| | | void pm(int M, int N, float *A) |
| | | { |
| | | int i,j; |
| | |
| | | #include <time.h> |
| | | #include "list.h" |
| | | |
| | | int alphanum_to_int(char c); |
| | | char int_to_alphanum(int i); |
| | | void read_all(int fd, char *buffer, size_t bytes); |
| | | void write_all(int fd, char *buffer, size_t bytes); |
| | | char *find_replace(char *str, char *orig, char *rep); |