16 files modified
1 files renamed
| File was renamed from cfg/yolo-tiny.cfg |
| | |
| | | [net] |
| | | batch=64 |
| | | subdivisions=64 |
| | | subdivisions=2 |
| | | height=448 |
| | | width=448 |
| | | channels=3 |
| | | momentum=0.9 |
| | | decay=0.0005 |
| | | |
| | | learning_rate=0.0001 |
| | | saturation=.75 |
| | | exposure=.75 |
| | | hue = .1 |
| | | |
| | | learning_rate=0.0005 |
| | | policy=steps |
| | | steps=20,40,60,80,20000,30000 |
| | | scales=5,5,2,2,.1,.1 |
| | | steps=200,400,600,800,20000,30000 |
| | | scales=2.5,2,2,2,.1,.1 |
| | | max_batches = 40000 |
| | | |
| | | [crop] |
| | | crop_width=448 |
| | | crop_height=448 |
| | | flip=0 |
| | | angle=0 |
| | | saturation = 1.5 |
| | | exposure = 1.5 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=16 |
| | | size=3 |
| | | stride=1 |
| | |
| | | stride=2 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=32 |
| | | size=3 |
| | | stride=1 |
| | |
| | | stride=2 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=64 |
| | | size=3 |
| | | stride=1 |
| | |
| | | stride=2 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=128 |
| | | size=3 |
| | | stride=1 |
| | |
| | | stride=2 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=256 |
| | | size=3 |
| | | stride=1 |
| | |
| | | stride=2 |
| | | |
| | | [convolutional] |
| | | batch_normalize=1 |
| | | filters=512 |
| | | size=3 |
| | | stride=1 |
| | |
| | | stride=2 |
| | | |
| | | [convolutional] |
| | | filters=1024 |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=1024 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | filters=1024 |
| | | batch_normalize=1 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | filters=256 |
| | | activation=leaky |
| | | |
| | | [convolutional] |
| | | filters=1024 |
| | | size=3 |
| | | stride=1 |
| | | pad=1 |
| | | activation=leaky |
| | | |
| | | [connected] |
| | | output=256 |
| | | activation=linear |
| | | |
| | | [connected] |
| | | output=4096 |
| | | activation=leaky |
| | | |
| | | [dropout] |
| | | probability=.5 |
| | | |
| | | [connected] |
| | | output= 1470 |
| | | activation=linear |
| | |
| | | __device__ float loggy_activate_kernel(float x){return 2./(1. + exp(-x)) - 1;} |
| | | __device__ float relu_activate_kernel(float x){return x*(x>0);} |
| | | __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} |
| | | __device__ float relie_activate_kernel(float x){return x*(x>0);} |
| | | __device__ float relie_activate_kernel(float x){return (x>0) ? x : .01*x;} |
| | | __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;} |
| | | __device__ float leaky_activate_kernel(float x){return (x>0) ? x : .1*x;} |
| | | __device__ float tanh_activate_kernel(float x){return (2/(1 + exp(-2*x)) - 1);} |
| | |
| | | static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;} |
| | | static inline float relu_activate(float x){return x*(x>0);} |
| | | static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} |
| | | static inline float relie_activate(float x){return x*(x>0);} |
| | | static inline float relie_activate(float x){return (x>0) ? x : .01*x;} |
| | | static inline float ramp_activate(float x){return x*(x>0)+.1*x;} |
| | | static inline float leaky_activate(float x){return (x>0) ? x : .1*x;} |
| | | static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} |
| | |
| | | args.min = net.min_crop; |
| | | args.max = net.max_crop; |
| | | args.angle = net.angle; |
| | | args.aspect = net.aspect; |
| | | args.exposure = net.exposure; |
| | | args.saturation = net.saturation; |
| | | args.hue = net.hue; |
| | |
| | | { |
| | | int i, j; |
| | | for(i = 0; i < l.outputs; ++i){ |
| | | float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .00001); |
| | | float scale = l.scales[i]/sqrt(l.rolling_variance[i] + .000001); |
| | | for(j = 0; j < l.inputs; ++j){ |
| | | l.weights[i*l.inputs + j] *= scale; |
| | | } |
| | |
| | | } |
| | | } |
| | | |
| | | |
| | | void statistics_connected_layer(layer l) |
| | | { |
| | | if(l.batch_normalize){ |
| | | printf("Scales "); |
| | | print_statistics(l.scales, l.outputs); |
| | | printf("Rolling Mean "); |
| | | print_statistics(l.rolling_mean, l.outputs); |
| | | printf("Rolling Variance "); |
| | | print_statistics(l.rolling_variance, l.outputs); |
| | | } |
| | | printf("Biases "); |
| | | print_statistics(l.biases, l.outputs); |
| | | printf("Weights "); |
| | | print_statistics(l.weights, l.outputs); |
| | | } |
| | | |
| | | #ifdef GPU |
| | | |
| | | void pull_connected_layer(connected_layer l) |
| | |
| | | void backward_connected_layer(connected_layer layer, network_state state); |
| | | void update_connected_layer(connected_layer layer, int batch, float learning_rate, float momentum, float decay); |
| | | void denormalize_connected_layer(layer l); |
| | | void statistics_connected_layer(layer l); |
| | | |
| | | #ifdef GPU |
| | | void forward_connected_layer_gpu(connected_layer layer, network_state state); |
| | |
| | | save_weights(net, outfile); |
| | | } |
| | | |
| | | void statistics_net(char *cfgfile, char *weightfile) |
| | | { |
| | | gpu_index = -1; |
| | | network net = parse_network_cfg(cfgfile); |
| | | if (weightfile) { |
| | | load_weights(&net, weightfile); |
| | | } |
| | | int i; |
| | | for (i = 0; i < net.n; ++i) { |
| | | layer l = net.layers[i]; |
| | | if (l.type == CONNECTED && l.batch_normalize) { |
| | | printf("Connected Layer %d\n", i); |
| | | statistics_connected_layer(l); |
| | | } |
| | | if (l.type == GRU && l.batch_normalize) { |
| | | printf("GRU Layer %d\n", i); |
| | | printf("Input Z\n"); |
| | | statistics_connected_layer(*l.input_z_layer); |
| | | printf("Input R\n"); |
| | | statistics_connected_layer(*l.input_r_layer); |
| | | printf("Input H\n"); |
| | | statistics_connected_layer(*l.input_h_layer); |
| | | printf("State Z\n"); |
| | | statistics_connected_layer(*l.state_z_layer); |
| | | printf("State R\n"); |
| | | statistics_connected_layer(*l.state_r_layer); |
| | | printf("State H\n"); |
| | | statistics_connected_layer(*l.state_h_layer); |
| | | } |
| | | printf("\n"); |
| | | } |
| | | } |
| | | |
| | | void denormalize_net(char *cfgfile, char *weightfile, char *outfile) |
| | | { |
| | | gpu_index = -1; |
| | |
| | | reset_normalize_net(argv[2], argv[3], argv[4]); |
| | | } else if (0 == strcmp(argv[1], "denormalize")){ |
| | | denormalize_net(argv[2], argv[3], argv[4]); |
| | | } else if (0 == strcmp(argv[1], "statistics")){ |
| | | statistics_net(argv[2], argv[3]); |
| | | } else if (0 == strcmp(argv[1], "normalize")){ |
| | | normalize_net(argv[2], argv[3], argv[4]); |
| | | } else if (0 == strcmp(argv[1], "rescale")){ |
| | |
| | | return X; |
| | | } |
| | | |
| | | matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure) |
| | | matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) |
| | | { |
| | | int i; |
| | | matrix X; |
| | |
| | | |
| | | for(i = 0; i < n; ++i){ |
| | | image im = load_image_color(paths[i], 0, 0); |
| | | image crop = random_augment_image(im, angle, min, max, size); |
| | | image crop = random_augment_image(im, angle, aspect, min, max, size); |
| | | int flip = rand_r(&data_seed)%2; |
| | | if (flip) flip_image(crop); |
| | | random_distort_image(crop, hue, saturation, exposure); |
| | |
| | | load_args a = *(struct load_args*)ptr; |
| | | if(a.exposure == 0) a.exposure = 1; |
| | | if(a.saturation == 0) a.saturation = 1; |
| | | if(a.aspect == 0) a.aspect = 1; |
| | | |
| | | if (a.type == OLD_CLASSIFICATION_DATA){ |
| | | *a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); |
| | | } else if (a.type == CLASSIFICATION_DATA){ |
| | | *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure); |
| | | *a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); |
| | | } else if (a.type == SUPER_DATA){ |
| | | *a.d = load_data_super(a.paths, a.n, a.m, a.w, a.h, a.scale); |
| | | } else if (a.type == STUDY_DATA){ |
| | | *a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure); |
| | | *a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); |
| | | } else if (a.type == WRITING_DATA){ |
| | | *a.d = load_data_writing(a.paths, a.n, a.m, a.w, a.h, a.out_w, a.out_h); |
| | | } else if (a.type == REGION_DATA){ |
| | |
| | | *(a.im) = load_image_color(a.path, 0, 0); |
| | | *(a.resized) = resize_image(*(a.im), a.w, a.h); |
| | | } else if (a.type == TAG_DATA){ |
| | | *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.hue, a.saturation, a.exposure); |
| | | *a.d = load_data_tag(a.paths, a.n, a.m, a.classes, a.min, a.max, a.size, a.angle, a.aspect, a.hue, a.saturation, a.exposure); |
| | | //*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h); |
| | | } |
| | | free(ptr); |
| | |
| | | return d; |
| | | } |
| | | |
| | | data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure) |
| | | data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) |
| | | { |
| | | data d = {0}; |
| | | d.indexes = calloc(n, sizeof(int)); |
| | | if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes); |
| | | d.shallow = 0; |
| | | d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure); |
| | | d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure); |
| | | d.y = load_labels_paths(paths, n, labels, k); |
| | | if(m) free(paths); |
| | | return d; |
| | |
| | | return d; |
| | | } |
| | | |
| | | data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure) |
| | | data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) |
| | | { |
| | | if(m) paths = get_random_paths(paths, n, m); |
| | | data d = {0}; |
| | | d.shallow = 0; |
| | | d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure); |
| | | d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure); |
| | | d.y = load_labels_paths(paths, n, labels, k); |
| | | if(m) free(paths); |
| | | return d; |
| | | } |
| | | |
| | | data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure) |
| | | data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure) |
| | | { |
| | | if(m) paths = get_random_paths(paths, n, m); |
| | | data d = {0}; |
| | | d.w = size; |
| | | d.h = size; |
| | | d.shallow = 0; |
| | | d.X = load_image_augment_paths(paths, n, min, max, size, angle, hue, saturation, exposure); |
| | | d.X = load_image_augment_paths(paths, n, min, max, size, angle, aspect, hue, saturation, exposure); |
| | | d.y = load_tags_paths(paths, n, k); |
| | | if(m) free(paths); |
| | | return d; |
| | |
| | | int scale; |
| | | float jitter; |
| | | float angle; |
| | | float aspect; |
| | | float saturation; |
| | | float exposure; |
| | | float hue; |
| | |
| | | data load_data_captcha_encode(char **paths, int n, int m, int w, int h); |
| | | data load_data(char **paths, int n, int m, char **labels, int k, int w, int h); |
| | | data load_data_detection(int n, char **paths, int m, int w, int h, int boxes, int classes, float jitter, float hue, float saturation, float exposure); |
| | | data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure); |
| | | matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float hue, float saturation, float exposure); |
| | | data load_data_tag(char **paths, int n, int m, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); |
| | | matrix load_image_augment_paths(char **paths, int n, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); |
| | | data load_data_super(char **paths, int n, int m, int w, int h, int scale); |
| | | data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure); |
| | | data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float hue, float saturation, float exposure); |
| | | data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); |
| | | data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size, float angle, float aspect, float hue, float saturation, float exposure); |
| | | data load_go(char *filename); |
| | | |
| | | box_label *read_boxes(char *filename, int *n); |
| | |
| | | int box_index = index * (classes + 5); |
| | | boxes[index].x = (predictions[box_index + 0] + col + .5) / side * w; |
| | | boxes[index].y = (predictions[box_index + 1] + row + .5) / side * h; |
| | | if(1){ |
| | | boxes[index].x = (logistic_activate(predictions[box_index + 0]) + col) / side * w; |
| | | boxes[index].y = (logistic_activate(predictions[box_index + 1]) + row) / side * h; |
| | | } |
| | | boxes[index].w = pow(logistic_activate(predictions[box_index + 2]), (square?2:1)) * w; |
| | | boxes[index].h = pow(logistic_activate(predictions[box_index + 3]), (square?2:1)) * h; |
| | | for(j = 0; j < classes; ++j){ |
| | |
| | | free_image(val_resized[t]); |
| | | } |
| | | } |
| | | for(j = 0; j < classes; ++j){ |
| | | fclose(fps[j]); |
| | | } |
| | | fprintf(stderr, "Total Detection Time: %f Seconds\n", (double)(time(0) - start)); |
| | | } |
| | | |
| | |
| | | return out; |
| | | } |
| | | |
| | | image rotate_crop_image(image im, float rad, float s, int w, int h, int dx, int dy) |
| | | |
| | | image rotate_crop_image(image im, float rad, float s, int w, int h, float dx, float dy, float aspect) |
| | | { |
| | | int x, y, c; |
| | | float cx = im.w/2.; |
| | |
| | | for(c = 0; c < im.c; ++c){ |
| | | for(y = 0; y < h; ++y){ |
| | | for(x = 0; x < w; ++x){ |
| | | float rx = cos(rad)*(x/s + dx/s -cx) - sin(rad)*(y/s + dy/s -cy) + cx; |
| | | float ry = sin(rad)*(x/s + dx/s -cx) + cos(rad)*(y/s + dy/s -cy) + cy; |
| | | float rx = cos(rad)*((x - w/2.)/s*aspect + dx/s*aspect) - sin(rad)*((y - h/2.)/s + dy/s) + cx; |
| | | float ry = sin(rad)*((x - w/2.)/s*aspect + dx/s*aspect) + cos(rad)*((y - h/2.)/s + dy/s) + cy; |
| | | float val = bilinear_interpolate(im, rx, ry, c); |
| | | set_pixel(rot, x, y, c, val); |
| | | } |
| | |
| | | return crop; |
| | | } |
| | | |
| | | image random_augment_image(image im, float angle, int low, int high, int size) |
| | | image random_augment_image(image im, float angle, float aspect, int low, int high, int size) |
| | | { |
| | | aspect = rand_scale(aspect); |
| | | int r = rand_int(low, high); |
| | | int min = (im.h < im.w) ? im.h : im.w; |
| | | int min = (im.h < im.w*aspect) ? im.h : im.w*aspect; |
| | | float scale = (float)r / min; |
| | | |
| | | float rad = rand_uniform(-angle, angle) * TWO_PI / 360.; |
| | | int dx = rand_int(0, scale * im.w - size); |
| | | int dy = rand_int(0, scale * im.h - size); |
| | | //printf("%d %d\n", dx, dy); |
| | | |
| | | image crop = rotate_crop_image(im, rad, scale, size, size, dx, dy); |
| | | float dx = (im.w*scale/aspect - size) / 2.; |
| | | float dy = (im.h*scale - size) / 2.; |
| | | if(dx < 0) dx = 0; |
| | | if(dy < 0) dy = 0; |
| | | dx = rand_uniform(-dx, dx); |
| | | dy = rand_uniform(-dy, dy); |
| | | |
| | | image crop = rotate_crop_image(im, rad, scale, size, size, dx, dy, aspect); |
| | | |
| | | return crop; |
| | | } |
| | |
| | | show_image(c4, "C4"); |
| | | #ifdef OPENCV |
| | | while(1){ |
| | | image aug = random_augment_image(im, 0, 320, 448, 320, .75); |
| | | show_image(aug, "aug"); |
| | | free_image(aug); |
| | | |
| | | |
| | | float exposure = 1.15; |
| | | float saturation = 1.15; |
| | | float hue = .05; |
| | |
| | | void scale_image(image m, float s); |
| | | image crop_image(image im, int dx, int dy, int w, int h); |
| | | image random_crop_image(image im, int w, int h); |
| | | image random_augment_image(image im, float angle, int low, int high, int size); |
| | | image random_augment_image(image im, float angle, float aspect, int low, int high, int size); |
| | | void random_distort_image(image im, float hue, float saturation, float exposure); |
| | | image resize_image(image im, int w, int h); |
| | | image resize_min(image im, int min); |
| | |
| | | int max_crop; |
| | | int min_crop; |
| | | float angle; |
| | | float aspect; |
| | | float exposure; |
| | | float saturation; |
| | | float hue; |
| | |
| | | net->min_crop = option_find_int_quiet(options, "min_crop",net->w); |
| | | |
| | | net->angle = option_find_float_quiet(options, "angle", 0); |
| | | net->aspect = option_find_float_quiet(options, "aspect", 1); |
| | | net->saturation = option_find_float_quiet(options, "saturation", 1); |
| | | net->exposure = option_find_float_quiet(options, "exposure", 1); |
| | | net->hue = option_find_float_quiet(options, "hue", 0); |
| | |
| | | b.w = logistic_activate(x[index + 2]); |
| | | b.h = logistic_activate(x[index + 3]); |
| | | } |
| | | //if(adjust && b.w < .01) b.w = .01; |
| | | //if(adjust && b.h < .01) b.h = .01; |
| | | if(adjust && b.w < .01) b.w = .01; |
| | | if(adjust && b.h < .01) b.h = .01; |
| | | return b; |
| | | } |
| | | |
| | |
| | | l.delta[index + 4] = l.noobject_scale * ((0 - l.output[index + 4]) * logistic_gradient(l.output[index + 4])); |
| | | if(best_iou > .5) l.delta[index + 4] = 0; |
| | | |
| | | /* |
| | | if(*(state.net.seen) < 6400){ |
| | | box truth = {0}; |
| | | truth.x = (i + .5)/l.w; |
| | |
| | | truth.h = .5; |
| | | delta_region_box(truth, l.output, index, i, j, l.w, l.h, l.delta, LOG, 1); |
| | | } |
| | | */ |
| | | } |
| | | } |
| | | } |
| | |
| | | } |
| | | } |
| | | |
| | | void print_statistics(float *a, int n) |
| | | { |
| | | float m = mean_array(a, n); |
| | | float v = variance_array(a, n); |
| | | printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v); |
| | | } |
| | | |
| | | float variance_array(float *a, int n) |
| | | { |
| | | int i; |
| | |
| | | int find_arg(int argc, char* argv[], char *arg); |
| | | char *find_char_arg(int argc, char **argv, char *arg, char *def); |
| | | int sample_array(float *a, int n); |
| | | void print_statistics(float *a, int n); |
| | | |
| | | #endif |
| | | |