From 1b5afb45838e603fa6780762eb8cc59246dc2d81 Mon Sep 17 00:00:00 2001
From: IlyaOvodov <b@ovdv.ru>
Date: Tue, 08 May 2018 11:09:35 +0000
Subject: [PATCH] Output improvements for detector results: When printing detector results, output was done in random order, obfuscating results for interpreting. Now: 1. Text output includes coordinates of rects in (left,right,top,bottom in pixels) along with label and score 2. Text output is sorted by rect lefts to simplify finding appropriate rects on image 3. If several class probs are > thresh for some detection, the most probable is written first and coordinates for others are not repeated 4. Rects are imprinted in image in order by their best class prob, so most probable rects are always on top and not overlayed by less probable ones 5. Most probable label for rect is always written first Also: 6. Message about low GPU memory include required amount

---
 src/parser.c | 1642 ++++++++++++++++++++++++++++++++++++----------------------
 1 files changed, 1,014 insertions(+), 628 deletions(-)

diff --git a/src/parser.c b/src/parser.c
index 3f94c80..7441ae2 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -2,39 +2,82 @@
 #include <string.h>
 #include <stdlib.h>
 
-#include "parser.h"
+#include "activation_layer.h"
 #include "activations.h"
-#include "crop_layer.h"
-#include "cost_layer.h"
-#include "convolutional_layer.h"
-#include "deconvolutional_layer.h"
+#include "assert.h"
+#include "avgpool_layer.h"
+#include "batchnorm_layer.h"
+#include "blas.h"
 #include "connected_layer.h"
+#include "convolutional_layer.h"
+#include "cost_layer.h"
+#include "crnn_layer.h"
+#include "crop_layer.h"
+#include "detection_layer.h"
+#include "dropout_layer.h"
+#include "gru_layer.h"
+#include "list.h"
+#include "local_layer.h"
 #include "maxpool_layer.h"
 #include "normalization_layer.h"
-#include "softmax_layer.h"
-#include "dropout_layer.h"
-#include "freeweight_layer.h"
-#include "list.h"
 #include "option_list.h"
+#include "parser.h"
+#include "region_layer.h"
+#include "reorg_layer.h"
+#include "reorg_old_layer.h"
+#include "rnn_layer.h"
+#include "route_layer.h"
+#include "shortcut_layer.h"
+#include "softmax_layer.h"
 #include "utils.h"
+#include "upsample_layer.h"
+#include "yolo_layer.h"
+#include <stdint.h>
 
 typedef struct{
     char *type;
     list *options;
 }section;
 
-int is_convolutional(section *s);
-int is_deconvolutional(section *s);
-int is_connected(section *s);
-int is_maxpool(section *s);
-int is_dropout(section *s);
-int is_freeweight(section *s);
-int is_softmax(section *s);
-int is_crop(section *s);
-int is_cost(section *s);
-int is_normalization(section *s);
 list *read_cfg(char *filename);
 
+LAYER_TYPE string_to_layer_type(char * type)
+{
+
+    if (strcmp(type, "[shortcut]")==0) return SHORTCUT;
+    if (strcmp(type, "[crop]")==0) return CROP;
+    if (strcmp(type, "[cost]")==0) return COST;
+    if (strcmp(type, "[detection]")==0) return DETECTION;
+    if (strcmp(type, "[region]")==0) return REGION;
+	if (strcmp(type, "[yolo]") == 0) return YOLO;
+    if (strcmp(type, "[local]")==0) return LOCAL;
+    if (strcmp(type, "[conv]")==0
+            || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
+    if (strcmp(type, "[activation]")==0) return ACTIVE;
+    if (strcmp(type, "[net]")==0
+            || strcmp(type, "[network]")==0) return NETWORK;
+    if (strcmp(type, "[crnn]")==0) return CRNN;
+    if (strcmp(type, "[gru]")==0) return GRU;
+    if (strcmp(type, "[rnn]")==0) return RNN;
+    if (strcmp(type, "[conn]")==0
+            || strcmp(type, "[connected]")==0) return CONNECTED;
+    if (strcmp(type, "[max]")==0
+            || strcmp(type, "[maxpool]")==0) return MAXPOOL;
+    if (strcmp(type, "[reorg]")==0) return REORG;
+	if (strcmp(type, "[reorg_old]") == 0) return REORG_OLD;
+    if (strcmp(type, "[avg]")==0
+            || strcmp(type, "[avgpool]")==0) return AVGPOOL;
+    if (strcmp(type, "[dropout]")==0) return DROPOUT;
+    if (strcmp(type, "[lrn]")==0
+            || strcmp(type, "[normalization]")==0) return NORMALIZATION;
+    if (strcmp(type, "[batchnorm]")==0) return BATCHNORM;
+    if (strcmp(type, "[soft]")==0
+            || strcmp(type, "[softmax]")==0) return SOFTMAX;
+    if (strcmp(type, "[route]")==0) return ROUTE;
+	if (strcmp(type, "[upsample]") == 0) return UPSAMPLE;
+    return BLANK;
+}
+
 void free_section(section *s)
 {
     free(s->type);
@@ -67,405 +110,712 @@
     }
 }
 
-deconvolutional_layer *parse_deconvolutional(list *options, network *net, int count)
-{
-    int h,w,c;
-    float learning_rate, momentum, decay;
-    int n = option_find_int(options, "filters",1);
-    int size = option_find_int(options, "size",1);
-    int stride = option_find_int(options, "stride",1);
-    char *activation_s = option_find_str(options, "activation", "sigmoid");
-    ACTIVATION activation = get_activation(activation_s);
-    if(count == 0){
-        learning_rate = option_find_float(options, "learning_rate", .001);
-        momentum = option_find_float(options, "momentum", .9);
-        decay = option_find_float(options, "decay", .0001);
-        h = option_find_int(options, "height",1);
-        w = option_find_int(options, "width",1);
-        c = option_find_int(options, "channels",1);
-        net->batch = option_find_int(options, "batch",1);
-        net->learning_rate = learning_rate;
-        net->momentum = momentum;
-        net->decay = decay;
-        net->seen = option_find_int(options, "seen",0);
-    }else{
-        learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate);
-        momentum = option_find_float_quiet(options, "momentum", net->momentum);
-        decay = option_find_float_quiet(options, "decay", net->decay);
-        image m =  get_network_image_layer(*net, count-1);
-        h = m.h;
-        w = m.w;
-        c = m.c;
-        if(h == 0) error("Layer before deconvolutional layer must output image.");
-    }
-    deconvolutional_layer *layer = make_deconvolutional_layer(net->batch,h,w,c,n,size,stride,activation,learning_rate,momentum,decay);
-    char *weights = option_find_str(options, "weights", 0);
-    char *biases = option_find_str(options, "biases", 0);
-    parse_data(weights, layer->filters, c*n*size*size);
-    parse_data(biases, layer->biases, n);
-    #ifdef GPU
-    if(weights || biases) push_deconvolutional_layer(*layer);
-    #endif
-    option_unused(options);
-    return layer;
-}
+typedef struct size_params{
+    int batch;
+    int inputs;
+    int h;
+    int w;
+    int c;
+    int index;
+    int time_steps;
+    network net;
+} size_params;
 
-convolutional_layer *parse_convolutional(list *options, network *net, int count)
+local_layer parse_local(list *options, size_params params)
 {
-    int h,w,c;
-    float learning_rate, momentum, decay;
     int n = option_find_int(options, "filters",1);
     int size = option_find_int(options, "size",1);
     int stride = option_find_int(options, "stride",1);
     int pad = option_find_int(options, "pad",0);
-    char *activation_s = option_find_str(options, "activation", "sigmoid");
+    char *activation_s = option_find_str(options, "activation", "logistic");
     ACTIVATION activation = get_activation(activation_s);
-    if(count == 0){
-        learning_rate = option_find_float(options, "learning_rate", .001);
-        momentum = option_find_float(options, "momentum", .9);
-        decay = option_find_float(options, "decay", .0001);
-        h = option_find_int(options, "height",1);
-        w = option_find_int(options, "width",1);
-        c = option_find_int(options, "channels",1);
-        net->batch = option_find_int(options, "batch",1);
-        net->learning_rate = learning_rate;
-        net->momentum = momentum;
-        net->decay = decay;
-        net->seen = option_find_int(options, "seen",0);
-    }else{
-        learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate);
-        momentum = option_find_float_quiet(options, "momentum", net->momentum);
-        decay = option_find_float_quiet(options, "decay", net->decay);
-        image m =  get_network_image_layer(*net, count-1);
-        h = m.h;
-        w = m.w;
-        c = m.c;
-        if(h == 0) error("Layer before convolutional layer must output image.");
-    }
-    convolutional_layer *layer = make_convolutional_layer(net->batch,h,w,c,n,size,stride,pad,activation,learning_rate,momentum,decay);
-    char *weights = option_find_str(options, "weights", 0);
-    char *biases = option_find_str(options, "biases", 0);
-    parse_data(weights, layer->filters, c*n*size*size);
-    parse_data(biases, layer->biases, n);
-    #ifdef GPU
-    if(weights || biases) push_convolutional_layer(*layer);
-    #endif
-    option_unused(options);
+
+    int batch,h,w,c;
+    h = params.h;
+    w = params.w;
+    c = params.c;
+    batch=params.batch;
+    if(!(h && w && c)) error("Layer before local layer must output image.");
+
+    local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation);
+
     return layer;
 }
 
-connected_layer *parse_connected(list *options, network *net, int count)
+convolutional_layer parse_convolutional(list *options, size_params params)
 {
-    int input;
-    float learning_rate, momentum, decay;
+    int n = option_find_int(options, "filters",1);
+    int size = option_find_int(options, "size",1);
+    int stride = option_find_int(options, "stride",1);
+    int pad = option_find_int_quiet(options, "pad",0);
+    int padding = option_find_int_quiet(options, "padding",0);
+    if(pad) padding = size/2;
+
+    char *activation_s = option_find_str(options, "activation", "logistic");
+    ACTIVATION activation = get_activation(activation_s);
+
+    int batch,h,w,c;
+    h = params.h;
+    w = params.w;
+    c = params.c;
+    batch=params.batch;
+    if(!(h && w && c)) error("Layer before convolutional layer must output image.");
+    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+    int binary = option_find_int_quiet(options, "binary", 0);
+    int xnor = option_find_int_quiet(options, "xnor", 0);
+
+    convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,padding,activation, batch_normalize, binary, xnor, params.net.adam);
+    layer.flipped = option_find_int_quiet(options, "flipped", 0);
+    layer.dot = option_find_float_quiet(options, "dot", 0);
+    if(params.net.adam){
+        layer.B1 = params.net.B1;
+        layer.B2 = params.net.B2;
+        layer.eps = params.net.eps;
+    }
+
+    return layer;
+}
+
+layer parse_crnn(list *options, size_params params)
+{
+    int output_filters = option_find_int(options, "output_filters",1);
+    int hidden_filters = option_find_int(options, "hidden_filters",1);
+    char *activation_s = option_find_str(options, "activation", "logistic");
+    ACTIVATION activation = get_activation(activation_s);
+    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+
+    layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, activation, batch_normalize);
+
+    l.shortcut = option_find_int_quiet(options, "shortcut", 0);
+
+    return l;
+}
+
+layer parse_rnn(list *options, size_params params)
+{
     int output = option_find_int(options, "output",1);
-    char *activation_s = option_find_str(options, "activation", "sigmoid");
+    int hidden = option_find_int(options, "hidden",1);
+    char *activation_s = option_find_str(options, "activation", "logistic");
     ACTIVATION activation = get_activation(activation_s);
-    if(count == 0){
-        input = option_find_int(options, "input",1);
-        net->batch = option_find_int(options, "batch",1);
-        learning_rate = option_find_float(options, "learning_rate", .001);
-        momentum = option_find_float(options, "momentum", .9);
-        decay = option_find_float(options, "decay", .0001);
-        net->learning_rate = learning_rate;
-        net->momentum = momentum;
-        net->decay = decay;
-    }else{
-        learning_rate = option_find_float_quiet(options, "learning_rate", net->learning_rate);
-        momentum = option_find_float_quiet(options, "momentum", net->momentum);
-        decay = option_find_float_quiet(options, "decay", net->decay);
-        input =  get_network_output_size_layer(*net, count-1);
-    }
-    connected_layer *layer = make_connected_layer(net->batch, input, output, activation,learning_rate,momentum,decay);
-    char *weights = option_find_str(options, "weights", 0);
-    char *biases = option_find_str(options, "biases", 0);
-    parse_data(biases, layer->biases, output);
-    parse_data(weights, layer->weights, input*output);
-    #ifdef GPU
-    if(weights || biases) push_connected_layer(*layer);
-    #endif
-    option_unused(options);
+    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+    int logistic = option_find_int_quiet(options, "logistic", 0);
+
+    layer l = make_rnn_layer(params.batch, params.inputs, hidden, output, params.time_steps, activation, batch_normalize, logistic);
+
+    l.shortcut = option_find_int_quiet(options, "shortcut", 0);
+
+    return l;
+}
+
+layer parse_gru(list *options, size_params params)
+{
+    int output = option_find_int(options, "output",1);
+    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+
+    layer l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize);
+
+    return l;
+}
+
+connected_layer parse_connected(list *options, size_params params)
+{
+    int output = option_find_int(options, "output",1);
+    char *activation_s = option_find_str(options, "activation", "logistic");
+    ACTIVATION activation = get_activation(activation_s);
+    int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+
+    connected_layer layer = make_connected_layer(params.batch, params.inputs, output, activation, batch_normalize);
+
     return layer;
 }
 
-softmax_layer *parse_softmax(list *options, network *net, int count)
+softmax_layer parse_softmax(list *options, size_params params)
 {
-    int input;
-    if(count == 0){
-        input = option_find_int(options, "input",1);
-        net->batch = option_find_int(options, "batch",1);
-        net->seen = option_find_int(options, "seen",0);
-    }else{
-        input =  get_network_output_size_layer(*net, count-1);
-    }
-    softmax_layer *layer = make_softmax_layer(net->batch, input);
-    option_unused(options);
+    int groups = option_find_int_quiet(options, "groups",1);
+    softmax_layer layer = make_softmax_layer(params.batch, params.inputs, groups);
+    layer.temperature = option_find_float_quiet(options, "temperature", 1);
+    char *tree_file = option_find_str(options, "tree", 0);
+    if (tree_file) layer.softmax_tree = read_tree(tree_file);
     return layer;
 }
 
-cost_layer *parse_cost(list *options, network *net, int count)
+int *parse_yolo_mask(char *a, int *num)
 {
-    int input;
-    if(count == 0){
-        input = option_find_int(options, "input",1);
-        net->batch = option_find_int(options, "batch",1);
-        net->seen = option_find_int(options, "seen",0);
-    }else{
-        input =  get_network_output_size_layer(*net, count-1);
+	int *mask = 0;
+	if (a) {
+		int len = strlen(a);
+		int n = 1;
+		int i;
+		for (i = 0; i < len; ++i) {
+			if (a[i] == ',') ++n;
+		}
+		mask = calloc(n, sizeof(int));
+		for (i = 0; i < n; ++i) {
+			int val = atoi(a);
+			mask[i] = val;
+			a = strchr(a, ',') + 1;
+		}
+		*num = n;
+	}
+	return mask;
+}
+
+layer parse_yolo(list *options, size_params params)
+{
+	int classes = option_find_int(options, "classes", 20);
+	int total = option_find_int(options, "num", 1);
+	int num = total;
+
+	char *a = option_find_str(options, "mask", 0);
+	int *mask = parse_yolo_mask(a, &num);
+	int max_boxes = option_find_int_quiet(options, "max", 30);
+	layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes);
+	assert(l.outputs == params.inputs);
+
+	//l.max_boxes = option_find_int_quiet(options, "max", 90);
+	l.jitter = option_find_float(options, "jitter", .2);
+	l.focal_loss = option_find_int_quiet(options, "focal_loss", 0);
+
+	l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
+	l.truth_thresh = option_find_float(options, "truth_thresh", 1);
+	l.random = option_find_int_quiet(options, "random", 0);
+
+	char *map_file = option_find_str(options, "map", 0);
+	if (map_file) l.map = read_map(map_file);
+
+	a = option_find_str(options, "anchors", 0);
+	if (a) {
+		int len = strlen(a);
+		int n = 1;
+		int i;
+		for (i = 0; i < len; ++i) {
+			if (a[i] == ',') ++n;
+		}
+		for (i = 0; i < n && i < total*2; ++i) {
+			float bias = atof(a);
+			l.biases[i] = bias;
+			a = strchr(a, ',') + 1;
+		}
+	}
+	return l;
+}
+
+layer parse_region(list *options, size_params params)
+{
+    int coords = option_find_int(options, "coords", 4);
+    int classes = option_find_int(options, "classes", 20);
+    int num = option_find_int(options, "num", 1);
+	int max_boxes = option_find_int_quiet(options, "max", 30);
+
+    layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords, max_boxes);
+    assert(l.outputs == params.inputs);
+
+    l.log = option_find_int_quiet(options, "log", 0);
+    l.sqrt = option_find_int_quiet(options, "sqrt", 0);
+
+    l.softmax = option_find_int(options, "softmax", 0);
+	l.focal_loss = option_find_int_quiet(options, "focal_loss", 0);
+    //l.max_boxes = option_find_int_quiet(options, "max",30);
+    l.jitter = option_find_float(options, "jitter", .2);
+    l.rescore = option_find_int_quiet(options, "rescore",0);
+
+    l.thresh = option_find_float(options, "thresh", .5);
+    l.classfix = option_find_int_quiet(options, "classfix", 0);
+    l.absolute = option_find_int_quiet(options, "absolute", 0);
+    l.random = option_find_int_quiet(options, "random", 0);
+
+    l.coord_scale = option_find_float(options, "coord_scale", 1);
+    l.object_scale = option_find_float(options, "object_scale", 1);
+    l.noobject_scale = option_find_float(options, "noobject_scale", 1);
+	l.mask_scale = option_find_float(options, "mask_scale", 1);
+    l.class_scale = option_find_float(options, "class_scale", 1);
+    l.bias_match = option_find_int_quiet(options, "bias_match",0);
+
+    char *tree_file = option_find_str(options, "tree", 0);
+    if (tree_file) l.softmax_tree = read_tree(tree_file);
+    char *map_file = option_find_str(options, "map", 0);
+    if (map_file) l.map = read_map(map_file);
+
+    char *a = option_find_str(options, "anchors", 0);
+    if(a){
+        int len = strlen(a);
+        int n = 1;
+        int i;
+        for(i = 0; i < len; ++i){
+            if (a[i] == ',') ++n;
+        }
+        for(i = 0; i < n && i < num*2; ++i){
+            float bias = atof(a);
+            l.biases[i] = bias;
+            a = strchr(a, ',')+1;
+        }
     }
+    return l;
+}
+detection_layer parse_detection(list *options, size_params params)
+{
+    int coords = option_find_int(options, "coords", 1);
+    int classes = option_find_int(options, "classes", 1);
+    int rescore = option_find_int(options, "rescore", 0);
+    int num = option_find_int(options, "num", 1);
+    int side = option_find_int(options, "side", 7);
+    detection_layer layer = make_detection_layer(params.batch, params.inputs, num, side, classes, coords, rescore);
+
+    layer.softmax = option_find_int(options, "softmax", 0);
+    layer.sqrt = option_find_int(options, "sqrt", 0);
+
+    layer.max_boxes = option_find_int_quiet(options, "max",30);
+    layer.coord_scale = option_find_float(options, "coord_scale", 1);
+    layer.forced = option_find_int(options, "forced", 0);
+    layer.object_scale = option_find_float(options, "object_scale", 1);
+    layer.noobject_scale = option_find_float(options, "noobject_scale", 1);
+    layer.class_scale = option_find_float(options, "class_scale", 1);
+    layer.jitter = option_find_float(options, "jitter", .2);
+    layer.random = option_find_int_quiet(options, "random", 0);
+    layer.reorg = option_find_int_quiet(options, "reorg", 0);
+    return layer;
+}
+
+cost_layer parse_cost(list *options, size_params params)
+{
     char *type_s = option_find_str(options, "type", "sse");
     COST_TYPE type = get_cost_type(type_s);
-    cost_layer *layer = make_cost_layer(net->batch, input, type);
-    option_unused(options);
+    float scale = option_find_float_quiet(options, "scale",1);
+    cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale);
+    layer.ratio =  option_find_float_quiet(options, "ratio",0);
     return layer;
 }
 
-crop_layer *parse_crop(list *options, network *net, int count)
+crop_layer parse_crop(list *options, size_params params)
 {
-    float learning_rate, momentum, decay;
-    int h,w,c;
     int crop_height = option_find_int(options, "crop_height",1);
     int crop_width = option_find_int(options, "crop_width",1);
     int flip = option_find_int(options, "flip",0);
-    if(count == 0){
-        h = option_find_int(options, "height",1);
-        w = option_find_int(options, "width",1);
-        c = option_find_int(options, "channels",1);
-        net->batch = option_find_int(options, "batch",1);
-        learning_rate = option_find_float(options, "learning_rate", .001);
-        momentum = option_find_float(options, "momentum", .9);
-        decay = option_find_float(options, "decay", .0001);
-        net->learning_rate = learning_rate;
-        net->momentum = momentum;
-        net->decay = decay;
-        net->seen = option_find_int(options, "seen",0);
-    }else{
-        image m =  get_network_image_layer(*net, count-1);
-        h = m.h;
-        w = m.w;
-        c = m.c;
-        if(h == 0) error("Layer before crop layer must output image.");
-    }
-    crop_layer *layer = make_crop_layer(net->batch,h,w,c,crop_height,crop_width,flip);
-    option_unused(options);
+    float angle = option_find_float(options, "angle",0);
+    float saturation = option_find_float(options, "saturation",1);
+    float exposure = option_find_float(options, "exposure",1);
+
+    int batch,h,w,c;
+    h = params.h;
+    w = params.w;
+    c = params.c;
+    batch=params.batch;
+    if(!(h && w && c)) error("Layer before crop layer must output image.");
+
+    int noadjust = option_find_int_quiet(options, "noadjust",0);
+
+    crop_layer l = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle, saturation, exposure);
+    l.shift = option_find_float(options, "shift", 0);
+    l.noadjust = noadjust;
+    return l;
+}
+
+layer parse_reorg(list *options, size_params params)
+{
+    int stride = option_find_int(options, "stride",1);
+    int reverse = option_find_int_quiet(options, "reverse",0);
+
+    int batch,h,w,c;
+    h = params.h;
+    w = params.w;
+    c = params.c;
+    batch=params.batch;
+    if(!(h && w && c)) error("Layer before reorg layer must output image.");
+
+    layer layer = make_reorg_layer(batch,w,h,c,stride,reverse);
     return layer;
 }
 
-maxpool_layer *parse_maxpool(list *options, network *net, int count)
+layer parse_reorg_old(list *options, size_params params)
 {
-    int h,w,c;
+	printf("\n reorg_old \n");
+	int stride = option_find_int(options, "stride", 1);
+	int reverse = option_find_int_quiet(options, "reverse", 0);
+
+	int batch, h, w, c;
+	h = params.h;
+	w = params.w;
+	c = params.c;
+	batch = params.batch;
+	if (!(h && w && c)) error("Layer before reorg layer must output image.");
+
+	layer layer = make_reorg_old_layer(batch, w, h, c, stride, reverse);
+	return layer;
+}
+
+maxpool_layer parse_maxpool(list *options, size_params params)
+{
     int stride = option_find_int(options, "stride",1);
     int size = option_find_int(options, "size",stride);
-    if(count == 0){
-        h = option_find_int(options, "height",1);
-        w = option_find_int(options, "width",1);
-        c = option_find_int(options, "channels",1);
-        net->batch = option_find_int(options, "batch",1);
-        net->seen = option_find_int(options, "seen",0);
-    }else{
-        image m =  get_network_image_layer(*net, count-1);
-        h = m.h;
-        w = m.w;
-        c = m.c;
-        if(h == 0) error("Layer before convolutional layer must output image.");
-    }
-    maxpool_layer *layer = make_maxpool_layer(net->batch,h,w,c,size,stride);
-    option_unused(options);
+    int padding = option_find_int_quiet(options, "padding", (size-1)/2);
+
+    int batch,h,w,c;
+    h = params.h;
+    w = params.w;
+    c = params.c;
+    batch=params.batch;
+    if(!(h && w && c)) error("Layer before maxpool layer must output image.");
+
+    maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding);
     return layer;
 }
 
-/*
-freeweight_layer *parse_freeweight(list *options, network *net, int count)
+avgpool_layer parse_avgpool(list *options, size_params params)
 {
-    int input;
-    if(count == 0){
-        net->batch = option_find_int(options, "batch",1);
-        input = option_find_int(options, "input",1);
-    }else{
-        input =  get_network_output_size_layer(*net, count-1);
-    }
-    freeweight_layer *layer = make_freeweight_layer(net->batch,input);
-    option_unused(options);
+    int batch,w,h,c;
+    w = params.w;
+    h = params.h;
+    c = params.c;
+    batch=params.batch;
+    if(!(h && w && c)) error("Layer before avgpool layer must output image.");
+
+    avgpool_layer layer = make_avgpool_layer(batch,w,h,c);
     return layer;
 }
-*/
 
-dropout_layer *parse_dropout(list *options, network *net, int count)
+dropout_layer parse_dropout(list *options, size_params params)
 {
-    int input;
     float probability = option_find_float(options, "probability", .5);
-    if(count == 0){
-        net->batch = option_find_int(options, "batch",1);
-        input = option_find_int(options, "input",1);
-        float learning_rate = option_find_float(options, "learning_rate", .001);
-        float momentum = option_find_float(options, "momentum", .9);
-        float decay = option_find_float(options, "decay", .0001);
-        net->learning_rate = learning_rate;
-        net->momentum = momentum;
-        net->decay = decay;
-        net->seen = option_find_int(options, "seen",0);
-    }else{
-        input =  get_network_output_size_layer(*net, count-1);
-    }
-    dropout_layer *layer = make_dropout_layer(net->batch,input,probability);
-    option_unused(options);
+    dropout_layer layer = make_dropout_layer(params.batch, params.inputs, probability);
+    layer.out_w = params.w;
+    layer.out_h = params.h;
+    layer.out_c = params.c;
     return layer;
 }
 
-normalization_layer *parse_normalization(list *options, network *net, int count)
+layer parse_normalization(list *options, size_params params)
 {
-    int h,w,c;
-    int size = option_find_int(options, "size",1);
-    float alpha = option_find_float(options, "alpha", 0.);
-    float beta = option_find_float(options, "beta", 1.);
-    float kappa = option_find_float(options, "kappa", 1.);
-    if(count == 0){
-        h = option_find_int(options, "height",1);
-        w = option_find_int(options, "width",1);
-        c = option_find_int(options, "channels",1);
-        net->batch = option_find_int(options, "batch",1);
-        net->seen = option_find_int(options, "seen",0);
-    }else{
-        image m =  get_network_image_layer(*net, count-1);
-        h = m.h;
-        w = m.w;
-        c = m.c;
-        if(h == 0) error("Layer before convolutional layer must output image.");
+    float alpha = option_find_float(options, "alpha", .0001);
+    float beta =  option_find_float(options, "beta" , .75);
+    float kappa = option_find_float(options, "kappa", 1);
+    int size = option_find_int(options, "size", 5);
+    layer l = make_normalization_layer(params.batch, params.w, params.h, params.c, size, alpha, beta, kappa);
+    return l;
+}
+
+layer parse_batchnorm(list *options, size_params params)
+{
+    layer l = make_batchnorm_layer(params.batch, params.w, params.h, params.c);
+    return l;
+}
+
+layer parse_shortcut(list *options, size_params params, network net)
+{
+    char *l = option_find(options, "from");   
+    int index = atoi(l);
+    if(index < 0) index = params.index + index;
+
+    int batch = params.batch;
+    layer from = net.layers[index];
+
+    layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c);
+
+    char *activation_s = option_find_str(options, "activation", "linear");
+    ACTIVATION activation = get_activation(activation_s);
+    s.activation = activation;
+    return s;
+}
+
+
+layer parse_activation(list *options, size_params params)
+{
+    char *activation_s = option_find_str(options, "activation", "linear");
+    ACTIVATION activation = get_activation(activation_s);
+
+    layer l = make_activation_layer(params.batch, params.inputs, activation);
+
+    l.out_h = params.h;
+    l.out_w = params.w;
+    l.out_c = params.c;
+    l.h = params.h;
+    l.w = params.w;
+    l.c = params.c;
+
+    return l;
+}
+
+layer parse_upsample(list *options, size_params params, network net)
+{
+
+	int stride = option_find_int(options, "stride", 2);
+	layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride);
+	l.scale = option_find_float_quiet(options, "scale", 1);
+	return l;
+}
+
+route_layer parse_route(list *options, size_params params, network net)
+{
+    char *l = option_find(options, "layers");   
+    int len = strlen(l);
+    if(!l) error("Route Layer must specify input layers");
+    int n = 1;
+    int i;
+    for(i = 0; i < len; ++i){
+        if (l[i] == ',') ++n;
     }
-    normalization_layer *layer = make_normalization_layer(net->batch,h,w,c,size, alpha, beta, kappa);
-    option_unused(options);
+
+    int *layers = calloc(n, sizeof(int));
+    int *sizes = calloc(n, sizeof(int));
+    for(i = 0; i < n; ++i){
+        int index = atoi(l);
+        l = strchr(l, ',')+1;
+        if(index < 0) index = params.index + index;
+        layers[i] = index;
+        sizes[i] = net.layers[index].outputs;
+    }
+    int batch = params.batch;
+
+    route_layer layer = make_route_layer(batch, n, layers, sizes);
+
+    convolutional_layer first = net.layers[layers[0]];
+    layer.out_w = first.out_w;
+    layer.out_h = first.out_h;
+    layer.out_c = first.out_c;
+    for(i = 1; i < n; ++i){
+        int index = layers[i];
+        convolutional_layer next = net.layers[index];
+        if(next.out_w == first.out_w && next.out_h == first.out_h){
+            layer.out_c += next.out_c;
+        }else{
+            layer.out_h = layer.out_w = layer.out_c = 0;
+        }
+    }
+
     return layer;
 }
 
+learning_rate_policy get_policy(char *s)
+{
+    if (strcmp(s, "random")==0) return RANDOM;
+    if (strcmp(s, "poly")==0) return POLY;
+    if (strcmp(s, "constant")==0) return CONSTANT;
+    if (strcmp(s, "step")==0) return STEP;
+    if (strcmp(s, "exp")==0) return EXP;
+    if (strcmp(s, "sigmoid")==0) return SIG;
+    if (strcmp(s, "steps")==0) return STEPS;
+    fprintf(stderr, "Couldn't find policy %s, going with constant\n", s);
+    return CONSTANT;
+}
+
+void parse_net_options(list *options, network *net)
+{
+    net->batch = option_find_int(options, "batch",1);
+    net->learning_rate = option_find_float(options, "learning_rate", .001);
+    net->momentum = option_find_float(options, "momentum", .9);
+    net->decay = option_find_float(options, "decay", .0001);
+    int subdivs = option_find_int(options, "subdivisions",1);
+    net->time_steps = option_find_int_quiet(options, "time_steps",1);
+    net->batch /= subdivs;
+    net->batch *= net->time_steps;
+    net->subdivisions = subdivs;
+
+    net->adam = option_find_int_quiet(options, "adam", 0);
+    if(net->adam){
+        net->B1 = option_find_float(options, "B1", .9);
+        net->B2 = option_find_float(options, "B2", .999);
+        net->eps = option_find_float(options, "eps", .000001);
+    }
+
+    net->h = option_find_int_quiet(options, "height",0);
+    net->w = option_find_int_quiet(options, "width",0);
+    net->c = option_find_int_quiet(options, "channels",0);
+    net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c);
+    net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
+    net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
+	net->flip = option_find_int_quiet(options, "flip", 1);
+
+	net->small_object = option_find_int_quiet(options, "small_object", 0);
+    net->angle = option_find_float_quiet(options, "angle", 0);
+    net->aspect = option_find_float_quiet(options, "aspect", 1);
+    net->saturation = option_find_float_quiet(options, "saturation", 1);
+    net->exposure = option_find_float_quiet(options, "exposure", 1);
+    net->hue = option_find_float_quiet(options, "hue", 0);
+	net->power = option_find_float_quiet(options, "power", 4);
+
+    if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
+
+    char *policy_s = option_find_str(options, "policy", "constant");
+    net->policy = get_policy(policy_s);
+    net->burn_in = option_find_int_quiet(options, "burn_in", 0);
+#ifdef CUDNN_HALF
+	net->burn_in = 0;
+#endif
+    if(net->policy == STEP){
+        net->step = option_find_int(options, "step", 1);
+        net->scale = option_find_float(options, "scale", 1);
+    } else if (net->policy == STEPS){
+        char *l = option_find(options, "steps");   
+        char *p = option_find(options, "scales");   
+        if(!l || !p) error("STEPS policy must have steps and scales in cfg file");
+
+        int len = strlen(l);
+        int n = 1;
+        int i;
+        for(i = 0; i < len; ++i){
+            if (l[i] == ',') ++n;
+        }
+        int *steps = calloc(n, sizeof(int));
+        float *scales = calloc(n, sizeof(float));
+        for(i = 0; i < n; ++i){
+            int step    = atoi(l);
+            float scale = atof(p);
+            l = strchr(l, ',')+1;
+            p = strchr(p, ',')+1;
+            steps[i] = step;
+            scales[i] = scale;
+        }
+        net->scales = scales;
+        net->steps = steps;
+        net->num_steps = n;
+    } else if (net->policy == EXP){
+        net->gamma = option_find_float(options, "gamma", 1);
+    } else if (net->policy == SIG){
+        net->gamma = option_find_float(options, "gamma", 1);
+        net->step = option_find_int(options, "step", 1);
+    } else if (net->policy == POLY || net->policy == RANDOM){
+        //net->power = option_find_float(options, "power", 1);
+    }
+    net->max_batches = option_find_int(options, "max_batches", 0);
+}
+
+int is_network(section *s)
+{
+    return (strcmp(s->type, "[net]")==0
+            || strcmp(s->type, "[network]")==0);
+}
+
 network parse_network_cfg(char *filename)
 {
-    list *sections = read_cfg(filename);
-    network net = make_network(sections->size, 0);
+	return parse_network_cfg_custom(filename, 0);
+}
 
+network parse_network_cfg_custom(char *filename, int batch)
+{
+    list *sections = read_cfg(filename);
     node *n = sections->front;
+    if(!n) error("Config file has no sections");
+    network net = make_network(sections->size - 1);
+    net.gpu_index = gpu_index;
+    size_params params;
+
+    section *s = (section *)n->val;
+    list *options = s->options;
+    if(!is_network(s)) error("First section must be [net] or [network]");
+    parse_net_options(options, &net);
+
+    params.h = net.h;
+    params.w = net.w;
+    params.c = net.c;
+    params.inputs = net.inputs;
+	if (batch > 0) net.batch = batch;
+    params.batch = net.batch;
+    params.time_steps = net.time_steps;
+    params.net = net;
+
+    size_t workspace_size = 0;
+    n = n->next;
     int count = 0;
+    free_section(s);
+    fprintf(stderr, "layer     filters    size              input                output\n");
     while(n){
-        section *s = (section *)n->val;
-        list *options = s->options;
-        if(is_convolutional(s)){
-            convolutional_layer *layer = parse_convolutional(options, &net, count);
-            net.types[count] = CONVOLUTIONAL;
-            net.layers[count] = layer;
-        }else if(is_deconvolutional(s)){
-            deconvolutional_layer *layer = parse_deconvolutional(options, &net, count);
-            net.types[count] = DECONVOLUTIONAL;
-            net.layers[count] = layer;
-        }else if(is_connected(s)){
-            connected_layer *layer = parse_connected(options, &net, count);
-            net.types[count] = CONNECTED;
-            net.layers[count] = layer;
-        }else if(is_crop(s)){
-            crop_layer *layer = parse_crop(options, &net, count);
-            net.types[count] = CROP;
-            net.layers[count] = layer;
-        }else if(is_cost(s)){
-            cost_layer *layer = parse_cost(options, &net, count);
-            net.types[count] = COST;
-            net.layers[count] = layer;
-        }else if(is_softmax(s)){
-            softmax_layer *layer = parse_softmax(options, &net, count);
-            net.types[count] = SOFTMAX;
-            net.layers[count] = layer;
-        }else if(is_maxpool(s)){
-            maxpool_layer *layer = parse_maxpool(options, &net, count);
-            net.types[count] = MAXPOOL;
-            net.layers[count] = layer;
-        }else if(is_normalization(s)){
-            normalization_layer *layer = parse_normalization(options, &net, count);
-            net.types[count] = NORMALIZATION;
-            net.layers[count] = layer;
-        }else if(is_dropout(s)){
-            dropout_layer *layer = parse_dropout(options, &net, count);
-            net.types[count] = DROPOUT;
-            net.layers[count] = layer;
-        }else if(is_freeweight(s)){
-            //freeweight_layer *layer = parse_freeweight(options, &net, count);
-            //net.types[count] = FREEWEIGHT;
-            //net.layers[count] = layer;
-            fprintf(stderr, "Type not recognized: %s\n", s->type);
+        params.index = count;
+        fprintf(stderr, "%5d ", count);
+        s = (section *)n->val;
+        options = s->options;
+        layer l = {0};
+        LAYER_TYPE lt = string_to_layer_type(s->type);
+        if(lt == CONVOLUTIONAL){
+            l = parse_convolutional(options, params);
+        }else if(lt == LOCAL){
+            l = parse_local(options, params);
+        }else if(lt == ACTIVE){
+            l = parse_activation(options, params);
+        }else if(lt == RNN){
+            l = parse_rnn(options, params);
+        }else if(lt == GRU){
+            l = parse_gru(options, params);
+        }else if(lt == CRNN){
+            l = parse_crnn(options, params);
+        }else if(lt == CONNECTED){
+            l = parse_connected(options, params);
+        }else if(lt == CROP){
+            l = parse_crop(options, params);
+        }else if(lt == COST){
+            l = parse_cost(options, params);
+        }else if(lt == REGION){
+            l = parse_region(options, params);
+		}else if (lt == YOLO) {
+			l = parse_yolo(options, params);
+        }else if(lt == DETECTION){
+            l = parse_detection(options, params);
+        }else if(lt == SOFTMAX){
+            l = parse_softmax(options, params);
+            net.hierarchy = l.softmax_tree;
+        }else if(lt == NORMALIZATION){
+            l = parse_normalization(options, params);
+        }else if(lt == BATCHNORM){
+            l = parse_batchnorm(options, params);
+        }else if(lt == MAXPOOL){
+            l = parse_maxpool(options, params);
+        }else if(lt == REORG){
+            l = parse_reorg(options, params);		}
+		else if (lt == REORG_OLD) {
+			l = parse_reorg_old(options, params);
+        }else if(lt == AVGPOOL){
+            l = parse_avgpool(options, params);
+        }else if(lt == ROUTE){
+            l = parse_route(options, params, net);
+		}else if (lt == UPSAMPLE) {
+			l = parse_upsample(options, params, net);
+        }else if(lt == SHORTCUT){
+            l = parse_shortcut(options, params, net);
+        }else if(lt == DROPOUT){
+            l = parse_dropout(options, params);
+            l.output = net.layers[count-1].output;
+            l.delta = net.layers[count-1].delta;
+#ifdef GPU
+            l.output_gpu = net.layers[count-1].output_gpu;
+            l.delta_gpu = net.layers[count-1].delta_gpu;
+#endif
         }else{
             fprintf(stderr, "Type not recognized: %s\n", s->type);
         }
+        l.onlyforward = option_find_int_quiet(options, "onlyforward", 0);
+        l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
+        l.dontload = option_find_int_quiet(options, "dontload", 0);
+        l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
+        option_unused(options);
+        net.layers[count] = l;
+        if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
         free_section(s);
-        ++count;
         n = n->next;
+        ++count;
+        if(n){
+            params.h = l.out_h;
+            params.w = l.out_w;
+            params.c = l.out_c;
+            params.inputs = l.outputs;
+        }
     }   
     free_list(sections);
     net.outputs = get_network_output_size(net);
     net.output = get_network_output(net);
+    if(workspace_size){
+        //printf("%ld\n", workspace_size);
+#ifdef GPU
+        if(gpu_index >= 0){
+            net.workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
+        }else {
+            net.workspace = calloc(1, workspace_size);
+        }
+#else
+        net.workspace = calloc(1, workspace_size);
+#endif
+    }
     return net;
 }
 
-int is_crop(section *s)
-{
-    return (strcmp(s->type, "[crop]")==0);
-}
-int is_cost(section *s)
-{
-    return (strcmp(s->type, "[cost]")==0);
-}
-int is_deconvolutional(section *s)
-{
-    return (strcmp(s->type, "[deconv]")==0
-            || strcmp(s->type, "[deconvolutional]")==0);
-}
-int is_convolutional(section *s)
-{
-    return (strcmp(s->type, "[conv]")==0
-            || strcmp(s->type, "[convolutional]")==0);
-}
-int is_connected(section *s)
-{
-    return (strcmp(s->type, "[conn]")==0
-            || strcmp(s->type, "[connected]")==0);
-}
-int is_maxpool(section *s)
-{
-    return (strcmp(s->type, "[max]")==0
-            || strcmp(s->type, "[maxpool]")==0);
-}
-int is_dropout(section *s)
-{
-    return (strcmp(s->type, "[dropout]")==0);
-}
-int is_freeweight(section *s)
-{
-    return (strcmp(s->type, "[freeweight]")==0);
-}
 
-int is_softmax(section *s)
-{
-    return (strcmp(s->type, "[soft]")==0
-            || strcmp(s->type, "[softmax]")==0);
-}
-int is_normalization(section *s)
-{
-    return (strcmp(s->type, "[lrnorm]")==0
-            || strcmp(s->type, "[localresponsenormalization]")==0);
-}
-
-int read_option(char *s, list *options)
-{
-    size_t i;
-    size_t len = strlen(s);
-    char *val = 0;
-    for(i = 0; i < len; ++i){
-        if(s[i] == '='){
-            s[i] = '\0';
-            val = s+i+1;
-            break;
-        }
-    }
-    if(i == len-1) return 0;
-    char *key = s;
-    option_insert(options, key, val);
-    return 1;
-}
 
 list *read_cfg(char *filename)
 {
@@ -502,321 +852,357 @@
     return sections;
 }
 
-void print_convolutional_cfg(FILE *fp, convolutional_layer *l, network net, int count)
+void save_convolutional_weights_binary(layer l, FILE *fp)
 {
-    #ifdef GPU
-    if(gpu_index >= 0)  pull_convolutional_layer(*l);
-    #endif
-    int i;
-    fprintf(fp, "[convolutional]\n");
-    if(count == 0) {
-        fprintf(fp,   "batch=%d\n"
-                "height=%d\n"
-                "width=%d\n"
-                "channels=%d\n"
-                "learning_rate=%g\n"
-                "momentum=%g\n"
-                "decay=%g\n"
-                "seen=%d\n",
-                l->batch,l->h, l->w, l->c, l->learning_rate, l->momentum, l->decay, net.seen);
-    } else {
-        if(l->learning_rate != net.learning_rate)
-            fprintf(fp, "learning_rate=%g\n", l->learning_rate);
-        if(l->momentum != net.momentum)
-            fprintf(fp, "momentum=%g\n", l->momentum);
-        if(l->decay != net.decay)
-            fprintf(fp, "decay=%g\n", l->decay);
+#ifdef GPU
+    if(gpu_index >= 0){
+        pull_convolutional_layer(l);
     }
-    fprintf(fp, "filters=%d\n"
-            "size=%d\n"
-            "stride=%d\n"
-            "pad=%d\n"
-            "activation=%s\n",
-            l->n, l->size, l->stride, l->pad,
-            get_activation_string(l->activation));
-    fprintf(fp, "biases=");
-    for(i = 0; i < l->n; ++i) fprintf(fp, "%g,", l->biases[i]);
-    fprintf(fp, "\n");
-    fprintf(fp, "weights=");
-    for(i = 0; i < l->n*l->c*l->size*l->size; ++i) fprintf(fp, "%g,", l->filters[i]);
-    fprintf(fp, "\n\n");
-}
-
-void print_deconvolutional_cfg(FILE *fp, deconvolutional_layer *l, network net, int count)
-{
-    #ifdef GPU
-    if(gpu_index >= 0)  pull_deconvolutional_layer(*l);
-    #endif
-    int i;
-    fprintf(fp, "[deconvolutional]\n");
-    if(count == 0) {
-        fprintf(fp,   "batch=%d\n"
-                "height=%d\n"
-                "width=%d\n"
-                "channels=%d\n"
-                "learning_rate=%g\n"
-                "momentum=%g\n"
-                "decay=%g\n"
-                "seen=%d\n",
-                l->batch,l->h, l->w, l->c, l->learning_rate, l->momentum, l->decay, net.seen);
-    } else {
-        if(l->learning_rate != net.learning_rate)
-            fprintf(fp, "learning_rate=%g\n", l->learning_rate);
-        if(l->momentum != net.momentum)
-            fprintf(fp, "momentum=%g\n", l->momentum);
-        if(l->decay != net.decay)
-            fprintf(fp, "decay=%g\n", l->decay);
+#endif
+    binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights);
+    int size = l.c*l.size*l.size;
+    int i, j, k;
+    fwrite(l.biases, sizeof(float), l.n, fp);
+    if (l.batch_normalize){
+        fwrite(l.scales, sizeof(float), l.n, fp);
+        fwrite(l.rolling_mean, sizeof(float), l.n, fp);
+        fwrite(l.rolling_variance, sizeof(float), l.n, fp);
     }
-    fprintf(fp, "filters=%d\n"
-            "size=%d\n"
-            "stride=%d\n"
-            "activation=%s\n",
-            l->n, l->size, l->stride,
-            get_activation_string(l->activation));
-    fprintf(fp, "biases=");
-    for(i = 0; i < l->n; ++i) fprintf(fp, "%g,", l->biases[i]);
-    fprintf(fp, "\n");
-    fprintf(fp, "weights=");
-    for(i = 0; i < l->n*l->c*l->size*l->size; ++i) fprintf(fp, "%g,", l->filters[i]);
-    fprintf(fp, "\n\n");
-}
-
-void print_freeweight_cfg(FILE *fp, freeweight_layer *l, network net, int count)
-{
-    fprintf(fp, "[freeweight]\n");
-    if(count == 0){
-        fprintf(fp, "batch=%d\ninput=%d\n",l->batch, l->inputs);
+    for(i = 0; i < l.n; ++i){
+        float mean = l.binary_weights[i*size];
+        if(mean < 0) mean = -mean;
+        fwrite(&mean, sizeof(float), 1, fp);
+        for(j = 0; j < size/8; ++j){
+            int index = i*size + j*8;
+            unsigned char c = 0;
+            for(k = 0; k < 8; ++k){
+                if (j*8 + k >= size) break;
+                if (l.binary_weights[index + k] > 0) c = (c | 1<<k);
+            }
+            fwrite(&c, sizeof(char), 1, fp);
+        }
     }
-    fprintf(fp, "\n");
 }
 
-void print_dropout_cfg(FILE *fp, dropout_layer *l, network net, int count)
+void save_convolutional_weights(layer l, FILE *fp)
 {
-    fprintf(fp, "[dropout]\n");
-    if(count == 0){
-        fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs);
+    if(l.binary){
+        //save_convolutional_weights_binary(l, fp);
+        //return;
     }
-    fprintf(fp, "probability=%g\n\n", l->probability);
-}
-
-void print_connected_cfg(FILE *fp, connected_layer *l, network net, int count)
-{
-    #ifdef GPU
-    if(gpu_index >= 0) pull_connected_layer(*l);
-    #endif
-    int i;
-    fprintf(fp, "[connected]\n");
-    if(count == 0){
-        fprintf(fp, "batch=%d\n"
-                "input=%d\n"
-                "learning_rate=%g\n"
-                "momentum=%g\n"
-                "decay=%g\n"
-                "seen=%d\n",
-                l->batch, l->inputs, l->learning_rate, l->momentum, l->decay, net.seen);
-    } else {
-        if(l->learning_rate != net.learning_rate)
-            fprintf(fp, "learning_rate=%g\n", l->learning_rate);
-        if(l->momentum != net.momentum)
-            fprintf(fp, "momentum=%g\n", l->momentum);
-        if(l->decay != net.decay)
-            fprintf(fp, "decay=%g\n", l->decay);
+#ifdef GPU
+    if(gpu_index >= 0){
+        pull_convolutional_layer(l);
     }
-    fprintf(fp, "output=%d\n"
-            "activation=%s\n",
-            l->outputs,
-            get_activation_string(l->activation));
-    fprintf(fp, "biases=");
-    for(i = 0; i < l->outputs; ++i) fprintf(fp, "%g,", l->biases[i]);
-    fprintf(fp, "\n");
-    fprintf(fp, "weights=");
-    for(i = 0; i < l->outputs*l->inputs; ++i) fprintf(fp, "%g,", l->weights[i]);
-    fprintf(fp, "\n\n");
-}
-
-void print_crop_cfg(FILE *fp, crop_layer *l, network net, int count)
-{
-    fprintf(fp, "[crop]\n");
-    if(count == 0) {
-        fprintf(fp,   "batch=%d\n"
-                "height=%d\n"
-                "width=%d\n"
-                "channels=%d\n"
-                "learning_rate=%g\n"
-                "momentum=%g\n"
-                "decay=%g\n"
-                "seen=%d\n",
-                l->batch,l->h, l->w, l->c, net.learning_rate, net.momentum, net.decay, net.seen);
+#endif
+    int num = l.n*l.c*l.size*l.size;
+    fwrite(l.biases, sizeof(float), l.n, fp);
+    if (l.batch_normalize){
+        fwrite(l.scales, sizeof(float), l.n, fp);
+        fwrite(l.rolling_mean, sizeof(float), l.n, fp);
+        fwrite(l.rolling_variance, sizeof(float), l.n, fp);
     }
-    fprintf(fp, "crop_height=%d\ncrop_width=%d\nflip=%d\n\n", l->crop_height, l->crop_width, l->flip);
+    fwrite(l.weights, sizeof(float), num, fp);
+    if(l.adam){
+        fwrite(l.m, sizeof(float), num, fp);
+        fwrite(l.v, sizeof(float), num, fp);
+    }
 }
 
-void print_maxpool_cfg(FILE *fp, maxpool_layer *l, network net, int count)
+void save_batchnorm_weights(layer l, FILE *fp)
 {
-    fprintf(fp, "[maxpool]\n");
-    if(count == 0) fprintf(fp,   "batch=%d\n"
-            "height=%d\n"
-            "width=%d\n"
-            "channels=%d\n",
-            l->batch,l->h, l->w, l->c);
-    fprintf(fp, "size=%d\nstride=%d\n\n", l->size, l->stride);
+#ifdef GPU
+    if(gpu_index >= 0){
+        pull_batchnorm_layer(l);
+    }
+#endif
+    fwrite(l.scales, sizeof(float), l.c, fp);
+    fwrite(l.rolling_mean, sizeof(float), l.c, fp);
+    fwrite(l.rolling_variance, sizeof(float), l.c, fp);
 }
 
-void print_normalization_cfg(FILE *fp, normalization_layer *l, network net, int count)
+void save_connected_weights(layer l, FILE *fp)
 {
-    fprintf(fp, "[localresponsenormalization]\n");
-    if(count == 0) fprintf(fp,   "batch=%d\n"
-            "height=%d\n"
-            "width=%d\n"
-            "channels=%d\n",
-            l->batch,l->h, l->w, l->c);
-    fprintf(fp, "size=%d\n"
-            "alpha=%g\n"
-            "beta=%g\n"
-            "kappa=%g\n\n", l->size, l->alpha, l->beta, l->kappa);
+#ifdef GPU
+    if(gpu_index >= 0){
+        pull_connected_layer(l);
+    }
+#endif
+    fwrite(l.biases, sizeof(float), l.outputs, fp);
+    fwrite(l.weights, sizeof(float), l.outputs*l.inputs, fp);
+    if (l.batch_normalize){
+        fwrite(l.scales, sizeof(float), l.outputs, fp);
+        fwrite(l.rolling_mean, sizeof(float), l.outputs, fp);
+        fwrite(l.rolling_variance, sizeof(float), l.outputs, fp);
+    }
 }
 
-void print_softmax_cfg(FILE *fp, softmax_layer *l, network net, int count)
+void save_weights_upto(network net, char *filename, int cutoff)
 {
-    fprintf(fp, "[softmax]\n");
-    if(count == 0) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs);
-    fprintf(fp, "\n");
-}
-
-void print_cost_cfg(FILE *fp, cost_layer *l, network net, int count)
-{
-    fprintf(fp, "[cost]\ntype=%s\n", get_cost_string(l->type));
-    if(count == 0) fprintf(fp, "batch=%d\ninput=%d\n", l->batch, l->inputs);
-    fprintf(fp, "\n");
-}
-
-void save_weights(network net, char *filename)
-{
+#ifdef GPU
+    if(net.gpu_index >= 0){
+        cuda_set_device(net.gpu_index);
+    }
+#endif
     fprintf(stderr, "Saving weights to %s\n", filename);
-    FILE *fp = fopen(filename, "w");
+    FILE *fp = fopen(filename, "wb");
     if(!fp) file_error(filename);
 
-    fwrite(&net.learning_rate, sizeof(float), 1, fp);
-    fwrite(&net.momentum, sizeof(float), 1, fp);
-    fwrite(&net.decay, sizeof(float), 1, fp);
-    fwrite(&net.seen, sizeof(int), 1, fp);
+    int major = 0;
+    int minor = 1;
+    int revision = 0;
+    fwrite(&major, sizeof(int), 1, fp);
+    fwrite(&minor, sizeof(int), 1, fp);
+    fwrite(&revision, sizeof(int), 1, fp);
+    fwrite(net.seen, sizeof(int), 1, fp);
 
     int i;
-    for(i = 0; i < net.n; ++i){
-        if(net.types[i] == CONVOLUTIONAL){
-            convolutional_layer layer = *(convolutional_layer *) net.layers[i];
-            #ifdef GPU
+    for(i = 0; i < net.n && i < cutoff; ++i){
+        layer l = net.layers[i];
+        if(l.type == CONVOLUTIONAL){
+            save_convolutional_weights(l, fp);
+        } if(l.type == CONNECTED){
+            save_connected_weights(l, fp);
+        } if(l.type == BATCHNORM){
+            save_batchnorm_weights(l, fp);
+        } if(l.type == RNN){
+            save_connected_weights(*(l.input_layer), fp);
+            save_connected_weights(*(l.self_layer), fp);
+            save_connected_weights(*(l.output_layer), fp);
+        } if(l.type == GRU){
+            save_connected_weights(*(l.input_z_layer), fp);
+            save_connected_weights(*(l.input_r_layer), fp);
+            save_connected_weights(*(l.input_h_layer), fp);
+            save_connected_weights(*(l.state_z_layer), fp);
+            save_connected_weights(*(l.state_r_layer), fp);
+            save_connected_weights(*(l.state_h_layer), fp);
+        } if(l.type == CRNN){
+            save_convolutional_weights(*(l.input_layer), fp);
+            save_convolutional_weights(*(l.self_layer), fp);
+            save_convolutional_weights(*(l.output_layer), fp);
+        } if(l.type == LOCAL){
+#ifdef GPU
             if(gpu_index >= 0){
-                pull_convolutional_layer(layer);
+                pull_local_layer(l);
             }
-            #endif
-            int num = layer.n*layer.c*layer.size*layer.size;
-            fwrite(layer.biases, sizeof(float), layer.n, fp);
-            fwrite(layer.filters, sizeof(float), num, fp);
-        }
-        if(net.types[i] == DECONVOLUTIONAL){
-            deconvolutional_layer layer = *(deconvolutional_layer *) net.layers[i];
-            #ifdef GPU
-            if(gpu_index >= 0){
-                pull_deconvolutional_layer(layer);
-            }
-            #endif
-            int num = layer.n*layer.c*layer.size*layer.size;
-            fwrite(layer.biases, sizeof(float), layer.n, fp);
-            fwrite(layer.filters, sizeof(float), num, fp);
-        }
-        if(net.types[i] == CONNECTED){
-            connected_layer layer = *(connected_layer *) net.layers[i];
-            #ifdef GPU
-            if(gpu_index >= 0){
-                pull_connected_layer(layer);
-            }
-            #endif
-            fwrite(layer.biases, sizeof(float), layer.outputs, fp);
-            fwrite(layer.weights, sizeof(float), layer.outputs*layer.inputs, fp);
+#endif
+            int locations = l.out_w*l.out_h;
+            int size = l.size*l.size*l.c*l.n*locations;
+            fwrite(l.biases, sizeof(float), l.outputs, fp);
+            fwrite(l.weights, sizeof(float), size, fp);
         }
     }
     fclose(fp);
 }
+void save_weights(network net, char *filename)
+{
+    save_weights_upto(net, filename, net.n);
+}
+
+void transpose_matrix(float *a, int rows, int cols)
+{
+    float *transpose = calloc(rows*cols, sizeof(float));
+    int x, y;
+    for(x = 0; x < rows; ++x){
+        for(y = 0; y < cols; ++y){
+            transpose[y*rows + x] = a[x*cols + y];
+        }
+    }
+    memcpy(a, transpose, rows*cols*sizeof(float));
+    free(transpose);
+}
+
+void load_connected_weights(layer l, FILE *fp, int transpose)
+{
+    fread(l.biases, sizeof(float), l.outputs, fp);
+    fread(l.weights, sizeof(float), l.outputs*l.inputs, fp);
+    if(transpose){
+        transpose_matrix(l.weights, l.inputs, l.outputs);
+    }
+    //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs));
+    //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs));
+    if (l.batch_normalize && (!l.dontloadscales)){
+        fread(l.scales, sizeof(float), l.outputs, fp);
+        fread(l.rolling_mean, sizeof(float), l.outputs, fp);
+        fread(l.rolling_variance, sizeof(float), l.outputs, fp);
+        //printf("Scales: %f mean %f variance\n", mean_array(l.scales, l.outputs), variance_array(l.scales, l.outputs));
+        //printf("rolling_mean: %f mean %f variance\n", mean_array(l.rolling_mean, l.outputs), variance_array(l.rolling_mean, l.outputs));
+        //printf("rolling_variance: %f mean %f variance\n", mean_array(l.rolling_variance, l.outputs), variance_array(l.rolling_variance, l.outputs));
+    }
+#ifdef GPU
+    if(gpu_index >= 0){
+        push_connected_layer(l);
+    }
+#endif
+}
+
+void load_batchnorm_weights(layer l, FILE *fp)
+{
+    fread(l.scales, sizeof(float), l.c, fp);
+    fread(l.rolling_mean, sizeof(float), l.c, fp);
+    fread(l.rolling_variance, sizeof(float), l.c, fp);
+#ifdef GPU
+    if(gpu_index >= 0){
+        push_batchnorm_layer(l);
+    }
+#endif
+}
+
+void load_convolutional_weights_binary(layer l, FILE *fp)
+{
+    fread(l.biases, sizeof(float), l.n, fp);
+    if (l.batch_normalize && (!l.dontloadscales)){
+        fread(l.scales, sizeof(float), l.n, fp);
+        fread(l.rolling_mean, sizeof(float), l.n, fp);
+        fread(l.rolling_variance, sizeof(float), l.n, fp);
+    }
+    int size = l.c*l.size*l.size;
+    int i, j, k;
+    for(i = 0; i < l.n; ++i){
+        float mean = 0;
+        fread(&mean, sizeof(float), 1, fp);
+        for(j = 0; j < size/8; ++j){
+            int index = i*size + j*8;
+            unsigned char c = 0;
+            fread(&c, sizeof(char), 1, fp);
+            for(k = 0; k < 8; ++k){
+                if (j*8 + k >= size) break;
+                l.weights[index + k] = (c & 1<<k) ? mean : -mean;
+            }
+        }
+    }
+#ifdef GPU
+    if(gpu_index >= 0){
+        push_convolutional_layer(l);
+    }
+#endif
+}
+
+void load_convolutional_weights(layer l, FILE *fp)
+{
+    if(l.binary){
+        //load_convolutional_weights_binary(l, fp);
+        //return;
+    }
+    int num = l.n*l.c*l.size*l.size;
+    fread(l.biases, sizeof(float), l.n, fp);
+    if (l.batch_normalize && (!l.dontloadscales)){
+        fread(l.scales, sizeof(float), l.n, fp);
+        fread(l.rolling_mean, sizeof(float), l.n, fp);
+        fread(l.rolling_variance, sizeof(float), l.n, fp);
+        if(0){
+            int i;
+            for(i = 0; i < l.n; ++i){
+                printf("%g, ", l.rolling_mean[i]);
+            }
+            printf("\n");
+            for(i = 0; i < l.n; ++i){
+                printf("%g, ", l.rolling_variance[i]);
+            }
+            printf("\n");
+        }
+        if(0){
+            fill_cpu(l.n, 0, l.rolling_mean, 1);
+            fill_cpu(l.n, 0, l.rolling_variance, 1);
+        }
+    }
+    fread(l.weights, sizeof(float), num, fp);
+    if(l.adam){
+        fread(l.m, sizeof(float), num, fp);
+        fread(l.v, sizeof(float), num, fp);
+    }
+    //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1);
+    if (l.flipped) {
+        transpose_matrix(l.weights, l.c*l.size*l.size, l.n);
+    }
+    //if (l.binary) binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.weights);
+#ifdef GPU
+    if(gpu_index >= 0){
+        push_convolutional_layer(l);
+    }
+#endif
+}
+
+
+void load_weights_upto(network *net, char *filename, int cutoff)
+{
+#ifdef GPU
+    if(net->gpu_index >= 0){
+        cuda_set_device(net->gpu_index);
+    }
+#endif
+    fprintf(stderr, "Loading weights from %s...", filename);
+    fflush(stdout);
+    FILE *fp = fopen(filename, "rb");
+    if(!fp) file_error(filename);
+
+    int major;
+    int minor;
+    int revision;
+    fread(&major, sizeof(int), 1, fp);
+    fread(&minor, sizeof(int), 1, fp);
+    fread(&revision, sizeof(int), 1, fp);
+	if ((major * 10 + minor) >= 2) {
+		printf("\n seen 64 \n");
+		uint64_t iseen = 0;
+		fread(&iseen, sizeof(uint64_t), 1, fp);
+		*net->seen = iseen;
+	}
+	else {
+		printf("\n seen 32 \n");
+		fread(net->seen, sizeof(int), 1, fp);
+	}
+    int transpose = (major > 1000) || (minor > 1000);
+
+    int i;
+    for(i = 0; i < net->n && i < cutoff; ++i){
+        layer l = net->layers[i];
+        if (l.dontload) continue;
+        if(l.type == CONVOLUTIONAL){
+            load_convolutional_weights(l, fp);
+        }
+        if(l.type == CONNECTED){
+            load_connected_weights(l, fp, transpose);
+        }
+        if(l.type == BATCHNORM){
+            load_batchnorm_weights(l, fp);
+        }
+        if(l.type == CRNN){
+            load_convolutional_weights(*(l.input_layer), fp);
+            load_convolutional_weights(*(l.self_layer), fp);
+            load_convolutional_weights(*(l.output_layer), fp);
+        }
+        if(l.type == RNN){
+            load_connected_weights(*(l.input_layer), fp, transpose);
+            load_connected_weights(*(l.self_layer), fp, transpose);
+            load_connected_weights(*(l.output_layer), fp, transpose);
+        }
+        if(l.type == GRU){
+            load_connected_weights(*(l.input_z_layer), fp, transpose);
+            load_connected_weights(*(l.input_r_layer), fp, transpose);
+            load_connected_weights(*(l.input_h_layer), fp, transpose);
+            load_connected_weights(*(l.state_z_layer), fp, transpose);
+            load_connected_weights(*(l.state_r_layer), fp, transpose);
+            load_connected_weights(*(l.state_h_layer), fp, transpose);
+        }
+        if(l.type == LOCAL){
+            int locations = l.out_w*l.out_h;
+            int size = l.size*l.size*l.c*l.n*locations;
+            fread(l.biases, sizeof(float), l.outputs, fp);
+            fread(l.weights, sizeof(float), size, fp);
+#ifdef GPU
+            if(gpu_index >= 0){
+                push_local_layer(l);
+            }
+#endif
+        }
+    }
+    fprintf(stderr, "Done!\n");
+    fclose(fp);
+}
 
 void load_weights(network *net, char *filename)
 {
-    fprintf(stderr, "Loading weights from %s\n", filename);
-    FILE *fp = fopen(filename, "r");
-    if(!fp) file_error(filename);
-
-    fread(&net->learning_rate, sizeof(float), 1, fp);
-    fread(&net->momentum, sizeof(float), 1, fp);
-    fread(&net->decay, sizeof(float), 1, fp);
-    fread(&net->seen, sizeof(int), 1, fp);
-    set_learning_network(net, net->learning_rate, net->momentum, net->decay);
-    
-    int i;
-    for(i = 0; i < net->n; ++i){
-        if(net->types[i] == CONVOLUTIONAL){
-            convolutional_layer layer = *(convolutional_layer *) net->layers[i];
-            int num = layer.n*layer.c*layer.size*layer.size;
-            fread(layer.biases, sizeof(float), layer.n, fp);
-            fread(layer.filters, sizeof(float), num, fp);
-            #ifdef GPU
-            if(gpu_index >= 0){
-                push_convolutional_layer(layer);
-            }
-            #endif
-        }
-        if(net->types[i] == DECONVOLUTIONAL){
-            deconvolutional_layer layer = *(deconvolutional_layer *) net->layers[i];
-            int num = layer.n*layer.c*layer.size*layer.size;
-            fread(layer.biases, sizeof(float), layer.n, fp);
-            fread(layer.filters, sizeof(float), num, fp);
-            #ifdef GPU
-            if(gpu_index >= 0){
-                push_deconvolutional_layer(layer);
-            }
-            #endif
-        }
-        if(net->types[i] == CONNECTED){
-            connected_layer layer = *(connected_layer *) net->layers[i];
-            fread(layer.biases, sizeof(float), layer.outputs, fp);
-            fread(layer.weights, sizeof(float), layer.outputs*layer.inputs, fp);
-            #ifdef GPU
-            if(gpu_index >= 0){
-                push_connected_layer(layer);
-            }
-            #endif
-        }
-    }
-    fclose(fp);
-}
-
-void save_network(network net, char *filename)
-{
-    FILE *fp = fopen(filename, "w");
-    if(!fp) file_error(filename);
-    int i;
-    for(i = 0; i < net.n; ++i)
-    {
-        if(net.types[i] == CONVOLUTIONAL)
-            print_convolutional_cfg(fp, (convolutional_layer *)net.layers[i], net, i);
-        else if(net.types[i] == DECONVOLUTIONAL)
-            print_deconvolutional_cfg(fp, (deconvolutional_layer *)net.layers[i], net, i);
-        else if(net.types[i] == CONNECTED)
-            print_connected_cfg(fp, (connected_layer *)net.layers[i], net, i);
-        else if(net.types[i] == CROP)
-            print_crop_cfg(fp, (crop_layer *)net.layers[i], net, i);
-        else if(net.types[i] == MAXPOOL)
-            print_maxpool_cfg(fp, (maxpool_layer *)net.layers[i], net, i);
-        else if(net.types[i] == FREEWEIGHT)
-            print_freeweight_cfg(fp, (freeweight_layer *)net.layers[i], net, i);
-        else if(net.types[i] == DROPOUT)
-            print_dropout_cfg(fp, (dropout_layer *)net.layers[i], net, i);
-        else if(net.types[i] == NORMALIZATION)
-            print_normalization_cfg(fp, (normalization_layer *)net.layers[i], net, i);
-        else if(net.types[i] == SOFTMAX)
-            print_softmax_cfg(fp, (softmax_layer *)net.layers[i], net, i);
-        else if(net.types[i] == COST)
-            print_cost_cfg(fp, (cost_layer *)net.layers[i], net, i);
-    }
-    fclose(fp);
+    load_weights_upto(net, filename, net->n);
 }
 

--
Gitblit v1.10.0