From 1b5afb45838e603fa6780762eb8cc59246dc2d81 Mon Sep 17 00:00:00 2001
From: IlyaOvodov <b@ovdv.ru>
Date: Tue, 08 May 2018 11:09:35 +0000
Subject: [PATCH] Output improvements for detector results: When printing detector results, output was done in random order, obfuscating results for interpreting. Now: 1. Text output includes coordinates of rects in (left,right,top,bottom in pixels) along with label and score 2. Text output is sorted by rect lefts to simplify finding appropriate rects on image 3. If several class probs are > thresh for some detection, the most probable is written first and coordinates for others are not repeated 4. Rects are imprinted in image in order by their best class prob, so most probable rects are always on top and not overlayed by less probable ones 5. Most probable label for rect is always written first Also: 6. Message about low GPU memory include required amount
---
src/parser.c | 1052 +++++++++++++++++++++++++++++++++++++++++++++------------
1 files changed, 826 insertions(+), 226 deletions(-)
diff --git a/src/parser.c b/src/parser.c
index 5591dc3..7441ae2 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -2,44 +2,82 @@
#include <string.h>
#include <stdlib.h>
-#include "parser.h"
+#include "activation_layer.h"
#include "activations.h"
-#include "crop_layer.h"
-#include "cost_layer.h"
-#include "convolutional_layer.h"
-#include "normalization_layer.h"
-#include "deconvolutional_layer.h"
-#include "connected_layer.h"
-#include "maxpool_layer.h"
-#include "softmax_layer.h"
-#include "dropout_layer.h"
-#include "detection_layer.h"
+#include "assert.h"
#include "avgpool_layer.h"
-#include "route_layer.h"
+#include "batchnorm_layer.h"
+#include "blas.h"
+#include "connected_layer.h"
+#include "convolutional_layer.h"
+#include "cost_layer.h"
+#include "crnn_layer.h"
+#include "crop_layer.h"
+#include "detection_layer.h"
+#include "dropout_layer.h"
+#include "gru_layer.h"
#include "list.h"
+#include "local_layer.h"
+#include "maxpool_layer.h"
+#include "normalization_layer.h"
#include "option_list.h"
+#include "parser.h"
+#include "region_layer.h"
+#include "reorg_layer.h"
+#include "reorg_old_layer.h"
+#include "rnn_layer.h"
+#include "route_layer.h"
+#include "shortcut_layer.h"
+#include "softmax_layer.h"
#include "utils.h"
+#include "upsample_layer.h"
+#include "yolo_layer.h"
+#include <stdint.h>
typedef struct{
char *type;
list *options;
}section;
-int is_network(section *s);
-int is_convolutional(section *s);
-int is_deconvolutional(section *s);
-int is_connected(section *s);
-int is_maxpool(section *s);
-int is_avgpool(section *s);
-int is_dropout(section *s);
-int is_softmax(section *s);
-int is_normalization(section *s);
-int is_crop(section *s);
-int is_cost(section *s);
-int is_detection(section *s);
-int is_route(section *s);
list *read_cfg(char *filename);
+LAYER_TYPE string_to_layer_type(char * type)
+{
+
+ if (strcmp(type, "[shortcut]")==0) return SHORTCUT;
+ if (strcmp(type, "[crop]")==0) return CROP;
+ if (strcmp(type, "[cost]")==0) return COST;
+ if (strcmp(type, "[detection]")==0) return DETECTION;
+ if (strcmp(type, "[region]")==0) return REGION;
+ if (strcmp(type, "[yolo]") == 0) return YOLO;
+ if (strcmp(type, "[local]")==0) return LOCAL;
+ if (strcmp(type, "[conv]")==0
+ || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
+ if (strcmp(type, "[activation]")==0) return ACTIVE;
+ if (strcmp(type, "[net]")==0
+ || strcmp(type, "[network]")==0) return NETWORK;
+ if (strcmp(type, "[crnn]")==0) return CRNN;
+ if (strcmp(type, "[gru]")==0) return GRU;
+ if (strcmp(type, "[rnn]")==0) return RNN;
+ if (strcmp(type, "[conn]")==0
+ || strcmp(type, "[connected]")==0) return CONNECTED;
+ if (strcmp(type, "[max]")==0
+ || strcmp(type, "[maxpool]")==0) return MAXPOOL;
+ if (strcmp(type, "[reorg]")==0) return REORG;
+ if (strcmp(type, "[reorg_old]") == 0) return REORG_OLD;
+ if (strcmp(type, "[avg]")==0
+ || strcmp(type, "[avgpool]")==0) return AVGPOOL;
+ if (strcmp(type, "[dropout]")==0) return DROPOUT;
+ if (strcmp(type, "[lrn]")==0
+ || strcmp(type, "[normalization]")==0) return NORMALIZATION;
+ if (strcmp(type, "[batchnorm]")==0) return BATCHNORM;
+ if (strcmp(type, "[soft]")==0
+ || strcmp(type, "[softmax]")==0) return SOFTMAX;
+ if (strcmp(type, "[route]")==0) return ROUTE;
+ if (strcmp(type, "[upsample]") == 0) return UPSAMPLE;
+ return BLANK;
+}
+
void free_section(section *s)
{
free(s->type);
@@ -78,36 +116,12 @@
int h;
int w;
int c;
+ int index;
+ int time_steps;
+ network net;
} size_params;
-deconvolutional_layer parse_deconvolutional(list *options, size_params params)
-{
- int n = option_find_int(options, "filters",1);
- int size = option_find_int(options, "size",1);
- int stride = option_find_int(options, "stride",1);
- char *activation_s = option_find_str(options, "activation", "logistic");
- ACTIVATION activation = get_activation(activation_s);
-
- int batch,h,w,c;
- h = params.h;
- w = params.w;
- c = params.c;
- batch=params.batch;
- if(!(h && w && c)) error("Layer before deconvolutional layer must output image.");
-
- deconvolutional_layer layer = make_deconvolutional_layer(batch,h,w,c,n,size,stride,activation);
-
- char *weights = option_find_str(options, "weights", 0);
- char *biases = option_find_str(options, "biases", 0);
- parse_data(weights, layer.filters, c*n*size*size);
- parse_data(biases, layer.biases, n);
- #ifdef GPU
- if(weights || biases) push_deconvolutional_layer(layer);
- #endif
- return layer;
-}
-
-convolutional_layer parse_convolutional(list *options, size_params params)
+local_layer parse_local(list *options, size_params params)
{
int n = option_find_int(options, "filters",1);
int size = option_find_int(options, "size",1);
@@ -121,54 +135,244 @@
w = params.w;
c = params.c;
batch=params.batch;
- if(!(h && w && c)) error("Layer before convolutional layer must output image.");
+ if(!(h && w && c)) error("Layer before local layer must output image.");
- convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,pad,activation);
+ local_layer layer = make_local_layer(batch,h,w,c,n,size,stride,pad,activation);
- char *weights = option_find_str(options, "weights", 0);
- char *biases = option_find_str(options, "biases", 0);
- parse_data(weights, layer.filters, c*n*size*size);
- parse_data(biases, layer.biases, n);
- #ifdef GPU
- if(weights || biases) push_convolutional_layer(layer);
- #endif
return layer;
}
+convolutional_layer parse_convolutional(list *options, size_params params)
+{
+ int n = option_find_int(options, "filters",1);
+ int size = option_find_int(options, "size",1);
+ int stride = option_find_int(options, "stride",1);
+ int pad = option_find_int_quiet(options, "pad",0);
+ int padding = option_find_int_quiet(options, "padding",0);
+ if(pad) padding = size/2;
+
+ char *activation_s = option_find_str(options, "activation", "logistic");
+ ACTIVATION activation = get_activation(activation_s);
+
+ int batch,h,w,c;
+ h = params.h;
+ w = params.w;
+ c = params.c;
+ batch=params.batch;
+ if(!(h && w && c)) error("Layer before convolutional layer must output image.");
+ int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+ int binary = option_find_int_quiet(options, "binary", 0);
+ int xnor = option_find_int_quiet(options, "xnor", 0);
+
+ convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,padding,activation, batch_normalize, binary, xnor, params.net.adam);
+ layer.flipped = option_find_int_quiet(options, "flipped", 0);
+ layer.dot = option_find_float_quiet(options, "dot", 0);
+ if(params.net.adam){
+ layer.B1 = params.net.B1;
+ layer.B2 = params.net.B2;
+ layer.eps = params.net.eps;
+ }
+
+ return layer;
+}
+
+layer parse_crnn(list *options, size_params params)
+{
+ int output_filters = option_find_int(options, "output_filters",1);
+ int hidden_filters = option_find_int(options, "hidden_filters",1);
+ char *activation_s = option_find_str(options, "activation", "logistic");
+ ACTIVATION activation = get_activation(activation_s);
+ int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+
+ layer l = make_crnn_layer(params.batch, params.w, params.h, params.c, hidden_filters, output_filters, params.time_steps, activation, batch_normalize);
+
+ l.shortcut = option_find_int_quiet(options, "shortcut", 0);
+
+ return l;
+}
+
+layer parse_rnn(list *options, size_params params)
+{
+ int output = option_find_int(options, "output",1);
+ int hidden = option_find_int(options, "hidden",1);
+ char *activation_s = option_find_str(options, "activation", "logistic");
+ ACTIVATION activation = get_activation(activation_s);
+ int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+ int logistic = option_find_int_quiet(options, "logistic", 0);
+
+ layer l = make_rnn_layer(params.batch, params.inputs, hidden, output, params.time_steps, activation, batch_normalize, logistic);
+
+ l.shortcut = option_find_int_quiet(options, "shortcut", 0);
+
+ return l;
+}
+
+layer parse_gru(list *options, size_params params)
+{
+ int output = option_find_int(options, "output",1);
+ int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
+
+ layer l = make_gru_layer(params.batch, params.inputs, output, params.time_steps, batch_normalize);
+
+ return l;
+}
+
connected_layer parse_connected(list *options, size_params params)
{
int output = option_find_int(options, "output",1);
char *activation_s = option_find_str(options, "activation", "logistic");
ACTIVATION activation = get_activation(activation_s);
+ int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0);
- connected_layer layer = make_connected_layer(params.batch, params.inputs, output, activation);
+ connected_layer layer = make_connected_layer(params.batch, params.inputs, output, activation, batch_normalize);
- char *weights = option_find_str(options, "weights", 0);
- char *biases = option_find_str(options, "biases", 0);
- parse_data(biases, layer.biases, output);
- parse_data(weights, layer.weights, params.inputs*output);
- #ifdef GPU
- if(weights || biases) push_connected_layer(layer);
- #endif
return layer;
}
softmax_layer parse_softmax(list *options, size_params params)
{
- int groups = option_find_int(options, "groups",1);
+ int groups = option_find_int_quiet(options, "groups",1);
softmax_layer layer = make_softmax_layer(params.batch, params.inputs, groups);
+ layer.temperature = option_find_float_quiet(options, "temperature", 1);
+ char *tree_file = option_find_str(options, "tree", 0);
+ if (tree_file) layer.softmax_tree = read_tree(tree_file);
return layer;
}
+int *parse_yolo_mask(char *a, int *num)
+{
+ int *mask = 0;
+ if (a) {
+ int len = strlen(a);
+ int n = 1;
+ int i;
+ for (i = 0; i < len; ++i) {
+ if (a[i] == ',') ++n;
+ }
+ mask = calloc(n, sizeof(int));
+ for (i = 0; i < n; ++i) {
+ int val = atoi(a);
+ mask[i] = val;
+ a = strchr(a, ',') + 1;
+ }
+ *num = n;
+ }
+ return mask;
+}
+
+layer parse_yolo(list *options, size_params params)
+{
+ int classes = option_find_int(options, "classes", 20);
+ int total = option_find_int(options, "num", 1);
+ int num = total;
+
+ char *a = option_find_str(options, "mask", 0);
+ int *mask = parse_yolo_mask(a, &num);
+ int max_boxes = option_find_int_quiet(options, "max", 30);
+ layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes);
+ assert(l.outputs == params.inputs);
+
+ //l.max_boxes = option_find_int_quiet(options, "max", 90);
+ l.jitter = option_find_float(options, "jitter", .2);
+ l.focal_loss = option_find_int_quiet(options, "focal_loss", 0);
+
+ l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
+ l.truth_thresh = option_find_float(options, "truth_thresh", 1);
+ l.random = option_find_int_quiet(options, "random", 0);
+
+ char *map_file = option_find_str(options, "map", 0);
+ if (map_file) l.map = read_map(map_file);
+
+ a = option_find_str(options, "anchors", 0);
+ if (a) {
+ int len = strlen(a);
+ int n = 1;
+ int i;
+ for (i = 0; i < len; ++i) {
+ if (a[i] == ',') ++n;
+ }
+ for (i = 0; i < n && i < total*2; ++i) {
+ float bias = atof(a);
+ l.biases[i] = bias;
+ a = strchr(a, ',') + 1;
+ }
+ }
+ return l;
+}
+
+layer parse_region(list *options, size_params params)
+{
+ int coords = option_find_int(options, "coords", 4);
+ int classes = option_find_int(options, "classes", 20);
+ int num = option_find_int(options, "num", 1);
+ int max_boxes = option_find_int_quiet(options, "max", 30);
+
+ layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords, max_boxes);
+ assert(l.outputs == params.inputs);
+
+ l.log = option_find_int_quiet(options, "log", 0);
+ l.sqrt = option_find_int_quiet(options, "sqrt", 0);
+
+ l.softmax = option_find_int(options, "softmax", 0);
+ l.focal_loss = option_find_int_quiet(options, "focal_loss", 0);
+ //l.max_boxes = option_find_int_quiet(options, "max",30);
+ l.jitter = option_find_float(options, "jitter", .2);
+ l.rescore = option_find_int_quiet(options, "rescore",0);
+
+ l.thresh = option_find_float(options, "thresh", .5);
+ l.classfix = option_find_int_quiet(options, "classfix", 0);
+ l.absolute = option_find_int_quiet(options, "absolute", 0);
+ l.random = option_find_int_quiet(options, "random", 0);
+
+ l.coord_scale = option_find_float(options, "coord_scale", 1);
+ l.object_scale = option_find_float(options, "object_scale", 1);
+ l.noobject_scale = option_find_float(options, "noobject_scale", 1);
+ l.mask_scale = option_find_float(options, "mask_scale", 1);
+ l.class_scale = option_find_float(options, "class_scale", 1);
+ l.bias_match = option_find_int_quiet(options, "bias_match",0);
+
+ char *tree_file = option_find_str(options, "tree", 0);
+ if (tree_file) l.softmax_tree = read_tree(tree_file);
+ char *map_file = option_find_str(options, "map", 0);
+ if (map_file) l.map = read_map(map_file);
+
+ char *a = option_find_str(options, "anchors", 0);
+ if(a){
+ int len = strlen(a);
+ int n = 1;
+ int i;
+ for(i = 0; i < len; ++i){
+ if (a[i] == ',') ++n;
+ }
+ for(i = 0; i < n && i < num*2; ++i){
+ float bias = atof(a);
+ l.biases[i] = bias;
+ a = strchr(a, ',')+1;
+ }
+ }
+ return l;
+}
detection_layer parse_detection(list *options, size_params params)
{
int coords = option_find_int(options, "coords", 1);
int classes = option_find_int(options, "classes", 1);
int rescore = option_find_int(options, "rescore", 0);
- int joint = option_find_int(options, "joint", 0);
- int objectness = option_find_int(options, "objectness", 0);
- int background = 0;
- detection_layer layer = make_detection_layer(params.batch, params.inputs, classes, coords, joint, rescore, background, objectness);
+ int num = option_find_int(options, "num", 1);
+ int side = option_find_int(options, "side", 7);
+ detection_layer layer = make_detection_layer(params.batch, params.inputs, num, side, classes, coords, rescore);
+
+ layer.softmax = option_find_int(options, "softmax", 0);
+ layer.sqrt = option_find_int(options, "sqrt", 0);
+
+ layer.max_boxes = option_find_int_quiet(options, "max",30);
+ layer.coord_scale = option_find_float(options, "coord_scale", 1);
+ layer.forced = option_find_int(options, "forced", 0);
+ layer.object_scale = option_find_float(options, "object_scale", 1);
+ layer.noobject_scale = option_find_float(options, "noobject_scale", 1);
+ layer.class_scale = option_find_float(options, "class_scale", 1);
+ layer.jitter = option_find_float(options, "jitter", .2);
+ layer.random = option_find_int_quiet(options, "random", 0);
+ layer.reorg = option_find_int_quiet(options, "reorg", 0);
return layer;
}
@@ -176,7 +380,9 @@
{
char *type_s = option_find_str(options, "type", "sse");
COST_TYPE type = get_cost_type(type_s);
- cost_layer layer = make_cost_layer(params.batch, params.inputs, type);
+ float scale = option_find_float_quiet(options, "scale",1);
+ cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale);
+ layer.ratio = option_find_float_quiet(options, "ratio",0);
return layer;
}
@@ -199,14 +405,49 @@
int noadjust = option_find_int_quiet(options, "noadjust",0);
crop_layer l = make_crop_layer(batch,h,w,c,crop_height,crop_width,flip, angle, saturation, exposure);
+ l.shift = option_find_float(options, "shift", 0);
l.noadjust = noadjust;
return l;
}
+layer parse_reorg(list *options, size_params params)
+{
+ int stride = option_find_int(options, "stride",1);
+ int reverse = option_find_int_quiet(options, "reverse",0);
+
+ int batch,h,w,c;
+ h = params.h;
+ w = params.w;
+ c = params.c;
+ batch=params.batch;
+ if(!(h && w && c)) error("Layer before reorg layer must output image.");
+
+ layer layer = make_reorg_layer(batch,w,h,c,stride,reverse);
+ return layer;
+}
+
+layer parse_reorg_old(list *options, size_params params)
+{
+ printf("\n reorg_old \n");
+ int stride = option_find_int(options, "stride", 1);
+ int reverse = option_find_int_quiet(options, "reverse", 0);
+
+ int batch, h, w, c;
+ h = params.h;
+ w = params.w;
+ c = params.c;
+ batch = params.batch;
+ if (!(h && w && c)) error("Layer before reorg layer must output image.");
+
+ layer layer = make_reorg_old_layer(batch, w, h, c, stride, reverse);
+ return layer;
+}
+
maxpool_layer parse_maxpool(list *options, size_params params)
{
int stride = option_find_int(options, "stride",1);
int size = option_find_int(options, "size",stride);
+ int padding = option_find_int_quiet(options, "padding", (size-1)/2);
int batch,h,w,c;
h = params.h;
@@ -215,7 +456,7 @@
batch=params.batch;
if(!(h && w && c)) error("Layer before maxpool layer must output image.");
- maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride);
+ maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding);
return layer;
}
@@ -236,6 +477,9 @@
{
float probability = option_find_float(options, "probability", .5);
dropout_layer layer = make_dropout_layer(params.batch, params.inputs, probability);
+ layer.out_w = params.w;
+ layer.out_h = params.h;
+ layer.out_c = params.c;
return layer;
}
@@ -249,6 +493,56 @@
return l;
}
+layer parse_batchnorm(list *options, size_params params)
+{
+ layer l = make_batchnorm_layer(params.batch, params.w, params.h, params.c);
+ return l;
+}
+
+layer parse_shortcut(list *options, size_params params, network net)
+{
+ char *l = option_find(options, "from");
+ int index = atoi(l);
+ if(index < 0) index = params.index + index;
+
+ int batch = params.batch;
+ layer from = net.layers[index];
+
+ layer s = make_shortcut_layer(batch, index, params.w, params.h, params.c, from.out_w, from.out_h, from.out_c);
+
+ char *activation_s = option_find_str(options, "activation", "linear");
+ ACTIVATION activation = get_activation(activation_s);
+ s.activation = activation;
+ return s;
+}
+
+
+layer parse_activation(list *options, size_params params)
+{
+ char *activation_s = option_find_str(options, "activation", "linear");
+ ACTIVATION activation = get_activation(activation_s);
+
+ layer l = make_activation_layer(params.batch, params.inputs, activation);
+
+ l.out_h = params.h;
+ l.out_w = params.w;
+ l.out_c = params.c;
+ l.h = params.h;
+ l.w = params.w;
+ l.c = params.c;
+
+ return l;
+}
+
+layer parse_upsample(list *options, size_params params, network net)
+{
+
+ int stride = option_find_int(options, "stride", 2);
+ layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride);
+ l.scale = option_find_float_quiet(options, "scale", 1);
+ return l;
+}
+
route_layer parse_route(list *options, size_params params, network net)
{
char *l = option_find(options, "layers");
@@ -265,13 +559,14 @@
for(i = 0; i < n; ++i){
int index = atoi(l);
l = strchr(l, ',')+1;
+ if(index < 0) index = params.index + index;
layers[i] = index;
sizes[i] = net.layers[index].outputs;
}
int batch = params.batch;
route_layer layer = make_route_layer(batch, n, layers, sizes);
-
+
convolutional_layer first = net.layers[layers[0]];
layer.out_w = first.out_w;
layer.out_h = first.out_h;
@@ -289,6 +584,19 @@
return layer;
}
+learning_rate_policy get_policy(char *s)
+{
+ if (strcmp(s, "random")==0) return RANDOM;
+ if (strcmp(s, "poly")==0) return POLY;
+ if (strcmp(s, "constant")==0) return CONSTANT;
+ if (strcmp(s, "step")==0) return STEP;
+ if (strcmp(s, "exp")==0) return EXP;
+ if (strcmp(s, "sigmoid")==0) return SIG;
+ if (strcmp(s, "steps")==0) return STEPS;
+ fprintf(stderr, "Couldn't find policy %s, going with constant\n", s);
+ return CONSTANT;
+}
+
void parse_net_options(list *options, network *net)
{
net->batch = option_find_int(options, "batch",1);
@@ -296,22 +604,98 @@
net->momentum = option_find_float(options, "momentum", .9);
net->decay = option_find_float(options, "decay", .0001);
int subdivs = option_find_int(options, "subdivisions",1);
+ net->time_steps = option_find_int_quiet(options, "time_steps",1);
net->batch /= subdivs;
+ net->batch *= net->time_steps;
net->subdivisions = subdivs;
+ net->adam = option_find_int_quiet(options, "adam", 0);
+ if(net->adam){
+ net->B1 = option_find_float(options, "B1", .9);
+ net->B2 = option_find_float(options, "B2", .999);
+ net->eps = option_find_float(options, "eps", .000001);
+ }
+
net->h = option_find_int_quiet(options, "height",0);
net->w = option_find_int_quiet(options, "width",0);
net->c = option_find_int_quiet(options, "channels",0);
net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c);
+ net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
+ net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
+ net->flip = option_find_int_quiet(options, "flip", 1);
+
+ net->small_object = option_find_int_quiet(options, "small_object", 0);
+ net->angle = option_find_float_quiet(options, "angle", 0);
+ net->aspect = option_find_float_quiet(options, "aspect", 1);
+ net->saturation = option_find_float_quiet(options, "saturation", 1);
+ net->exposure = option_find_float_quiet(options, "exposure", 1);
+ net->hue = option_find_float_quiet(options, "hue", 0);
+ net->power = option_find_float_quiet(options, "power", 4);
+
if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
+
+ char *policy_s = option_find_str(options, "policy", "constant");
+ net->policy = get_policy(policy_s);
+ net->burn_in = option_find_int_quiet(options, "burn_in", 0);
+#ifdef CUDNN_HALF
+ net->burn_in = 0;
+#endif
+ if(net->policy == STEP){
+ net->step = option_find_int(options, "step", 1);
+ net->scale = option_find_float(options, "scale", 1);
+ } else if (net->policy == STEPS){
+ char *l = option_find(options, "steps");
+ char *p = option_find(options, "scales");
+ if(!l || !p) error("STEPS policy must have steps and scales in cfg file");
+
+ int len = strlen(l);
+ int n = 1;
+ int i;
+ for(i = 0; i < len; ++i){
+ if (l[i] == ',') ++n;
+ }
+ int *steps = calloc(n, sizeof(int));
+ float *scales = calloc(n, sizeof(float));
+ for(i = 0; i < n; ++i){
+ int step = atoi(l);
+ float scale = atof(p);
+ l = strchr(l, ',')+1;
+ p = strchr(p, ',')+1;
+ steps[i] = step;
+ scales[i] = scale;
+ }
+ net->scales = scales;
+ net->steps = steps;
+ net->num_steps = n;
+ } else if (net->policy == EXP){
+ net->gamma = option_find_float(options, "gamma", 1);
+ } else if (net->policy == SIG){
+ net->gamma = option_find_float(options, "gamma", 1);
+ net->step = option_find_int(options, "step", 1);
+ } else if (net->policy == POLY || net->policy == RANDOM){
+ //net->power = option_find_float(options, "power", 1);
+ }
+ net->max_batches = option_find_int(options, "max_batches", 0);
+}
+
+int is_network(section *s)
+{
+ return (strcmp(s->type, "[net]")==0
+ || strcmp(s->type, "[network]")==0);
}
network parse_network_cfg(char *filename)
{
+ return parse_network_cfg_custom(filename, 0);
+}
+
+network parse_network_cfg_custom(char *filename, int batch)
+{
list *sections = read_cfg(filename);
node *n = sections->front;
if(!n) error("Config file has no sections");
network net = make_network(sections->size - 1);
+ net.gpu_index = gpu_index;
size_params params;
section *s = (section *)n->val;
@@ -323,147 +707,115 @@
params.w = net.w;
params.c = net.c;
params.inputs = net.inputs;
+ if (batch > 0) net.batch = batch;
params.batch = net.batch;
+ params.time_steps = net.time_steps;
+ params.net = net;
+ size_t workspace_size = 0;
n = n->next;
int count = 0;
+ free_section(s);
+ fprintf(stderr, "layer filters size input output\n");
while(n){
- fprintf(stderr, "%d: ", count);
+ params.index = count;
+ fprintf(stderr, "%5d ", count);
s = (section *)n->val;
options = s->options;
layer l = {0};
- if(is_convolutional(s)){
+ LAYER_TYPE lt = string_to_layer_type(s->type);
+ if(lt == CONVOLUTIONAL){
l = parse_convolutional(options, params);
- }else if(is_deconvolutional(s)){
- l = parse_deconvolutional(options, params);
- }else if(is_connected(s)){
+ }else if(lt == LOCAL){
+ l = parse_local(options, params);
+ }else if(lt == ACTIVE){
+ l = parse_activation(options, params);
+ }else if(lt == RNN){
+ l = parse_rnn(options, params);
+ }else if(lt == GRU){
+ l = parse_gru(options, params);
+ }else if(lt == CRNN){
+ l = parse_crnn(options, params);
+ }else if(lt == CONNECTED){
l = parse_connected(options, params);
- }else if(is_crop(s)){
+ }else if(lt == CROP){
l = parse_crop(options, params);
- }else if(is_cost(s)){
+ }else if(lt == COST){
l = parse_cost(options, params);
- }else if(is_detection(s)){
+ }else if(lt == REGION){
+ l = parse_region(options, params);
+ }else if (lt == YOLO) {
+ l = parse_yolo(options, params);
+ }else if(lt == DETECTION){
l = parse_detection(options, params);
- }else if(is_softmax(s)){
+ }else if(lt == SOFTMAX){
l = parse_softmax(options, params);
- }else if(is_normalization(s)){
+ net.hierarchy = l.softmax_tree;
+ }else if(lt == NORMALIZATION){
l = parse_normalization(options, params);
- }else if(is_maxpool(s)){
+ }else if(lt == BATCHNORM){
+ l = parse_batchnorm(options, params);
+ }else if(lt == MAXPOOL){
l = parse_maxpool(options, params);
- }else if(is_avgpool(s)){
+ }else if(lt == REORG){
+ l = parse_reorg(options, params); }
+ else if (lt == REORG_OLD) {
+ l = parse_reorg_old(options, params);
+ }else if(lt == AVGPOOL){
l = parse_avgpool(options, params);
- }else if(is_route(s)){
+ }else if(lt == ROUTE){
l = parse_route(options, params, net);
- }else if(is_dropout(s)){
+ }else if (lt == UPSAMPLE) {
+ l = parse_upsample(options, params, net);
+ }else if(lt == SHORTCUT){
+ l = parse_shortcut(options, params, net);
+ }else if(lt == DROPOUT){
l = parse_dropout(options, params);
l.output = net.layers[count-1].output;
l.delta = net.layers[count-1].delta;
- #ifdef GPU
+#ifdef GPU
l.output_gpu = net.layers[count-1].output_gpu;
l.delta_gpu = net.layers[count-1].delta_gpu;
- #endif
+#endif
}else{
fprintf(stderr, "Type not recognized: %s\n", s->type);
}
+ l.onlyforward = option_find_int_quiet(options, "onlyforward", 0);
+ l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
l.dontload = option_find_int_quiet(options, "dontload", 0);
+ l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
option_unused(options);
net.layers[count] = l;
+ if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
free_section(s);
n = n->next;
+ ++count;
if(n){
params.h = l.out_h;
params.w = l.out_w;
params.c = l.out_c;
params.inputs = l.outputs;
}
- ++count;
}
free_list(sections);
net.outputs = get_network_output_size(net);
net.output = get_network_output(net);
+ if(workspace_size){
+ //printf("%ld\n", workspace_size);
+#ifdef GPU
+ if(gpu_index >= 0){
+ net.workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
+ }else {
+ net.workspace = calloc(1, workspace_size);
+ }
+#else
+ net.workspace = calloc(1, workspace_size);
+#endif
+ }
return net;
}
-int is_crop(section *s)
-{
- return (strcmp(s->type, "[crop]")==0);
-}
-int is_cost(section *s)
-{
- return (strcmp(s->type, "[cost]")==0);
-}
-int is_detection(section *s)
-{
- return (strcmp(s->type, "[detection]")==0);
-}
-int is_deconvolutional(section *s)
-{
- return (strcmp(s->type, "[deconv]")==0
- || strcmp(s->type, "[deconvolutional]")==0);
-}
-int is_convolutional(section *s)
-{
- return (strcmp(s->type, "[conv]")==0
- || strcmp(s->type, "[convolutional]")==0);
-}
-int is_network(section *s)
-{
- return (strcmp(s->type, "[net]")==0
- || strcmp(s->type, "[network]")==0);
-}
-int is_connected(section *s)
-{
- return (strcmp(s->type, "[conn]")==0
- || strcmp(s->type, "[connected]")==0);
-}
-int is_maxpool(section *s)
-{
- return (strcmp(s->type, "[max]")==0
- || strcmp(s->type, "[maxpool]")==0);
-}
-int is_avgpool(section *s)
-{
- return (strcmp(s->type, "[avg]")==0
- || strcmp(s->type, "[avgpool]")==0);
-}
-int is_dropout(section *s)
-{
- return (strcmp(s->type, "[dropout]")==0);
-}
-int is_normalization(section *s)
-{
- return (strcmp(s->type, "[lrn]")==0
- || strcmp(s->type, "[normalization]")==0);
-}
-
-int is_softmax(section *s)
-{
- return (strcmp(s->type, "[soft]")==0
- || strcmp(s->type, "[softmax]")==0);
-}
-int is_route(section *s)
-{
- return (strcmp(s->type, "[route]")==0);
-}
-
-int read_option(char *s, list *options)
-{
- size_t i;
- size_t len = strlen(s);
- char *val = 0;
- for(i = 0; i < len; ++i){
- if(s[i] == '='){
- s[i] = '\0';
- val = s+i+1;
- break;
- }
- }
- if(i == len-1) return 0;
- char *key = s;
- option_insert(options, key, val);
- return 1;
-}
list *read_cfg(char *filename)
{
@@ -500,48 +852,144 @@
return sections;
}
+void save_convolutional_weights_binary(layer l, FILE *fp)
+{
+#ifdef GPU
+ if(gpu_index >= 0){
+ pull_convolutional_layer(l);
+ }
+#endif
+ binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights);
+ int size = l.c*l.size*l.size;
+ int i, j, k;
+ fwrite(l.biases, sizeof(float), l.n, fp);
+ if (l.batch_normalize){
+ fwrite(l.scales, sizeof(float), l.n, fp);
+ fwrite(l.rolling_mean, sizeof(float), l.n, fp);
+ fwrite(l.rolling_variance, sizeof(float), l.n, fp);
+ }
+ for(i = 0; i < l.n; ++i){
+ float mean = l.binary_weights[i*size];
+ if(mean < 0) mean = -mean;
+ fwrite(&mean, sizeof(float), 1, fp);
+ for(j = 0; j < size/8; ++j){
+ int index = i*size + j*8;
+ unsigned char c = 0;
+ for(k = 0; k < 8; ++k){
+ if (j*8 + k >= size) break;
+ if (l.binary_weights[index + k] > 0) c = (c | 1<<k);
+ }
+ fwrite(&c, sizeof(char), 1, fp);
+ }
+ }
+}
+
+void save_convolutional_weights(layer l, FILE *fp)
+{
+ if(l.binary){
+ //save_convolutional_weights_binary(l, fp);
+ //return;
+ }
+#ifdef GPU
+ if(gpu_index >= 0){
+ pull_convolutional_layer(l);
+ }
+#endif
+ int num = l.n*l.c*l.size*l.size;
+ fwrite(l.biases, sizeof(float), l.n, fp);
+ if (l.batch_normalize){
+ fwrite(l.scales, sizeof(float), l.n, fp);
+ fwrite(l.rolling_mean, sizeof(float), l.n, fp);
+ fwrite(l.rolling_variance, sizeof(float), l.n, fp);
+ }
+ fwrite(l.weights, sizeof(float), num, fp);
+ if(l.adam){
+ fwrite(l.m, sizeof(float), num, fp);
+ fwrite(l.v, sizeof(float), num, fp);
+ }
+}
+
+void save_batchnorm_weights(layer l, FILE *fp)
+{
+#ifdef GPU
+ if(gpu_index >= 0){
+ pull_batchnorm_layer(l);
+ }
+#endif
+ fwrite(l.scales, sizeof(float), l.c, fp);
+ fwrite(l.rolling_mean, sizeof(float), l.c, fp);
+ fwrite(l.rolling_variance, sizeof(float), l.c, fp);
+}
+
+void save_connected_weights(layer l, FILE *fp)
+{
+#ifdef GPU
+ if(gpu_index >= 0){
+ pull_connected_layer(l);
+ }
+#endif
+ fwrite(l.biases, sizeof(float), l.outputs, fp);
+ fwrite(l.weights, sizeof(float), l.outputs*l.inputs, fp);
+ if (l.batch_normalize){
+ fwrite(l.scales, sizeof(float), l.outputs, fp);
+ fwrite(l.rolling_mean, sizeof(float), l.outputs, fp);
+ fwrite(l.rolling_variance, sizeof(float), l.outputs, fp);
+ }
+}
+
void save_weights_upto(network net, char *filename, int cutoff)
{
+#ifdef GPU
+ if(net.gpu_index >= 0){
+ cuda_set_device(net.gpu_index);
+ }
+#endif
fprintf(stderr, "Saving weights to %s\n", filename);
- FILE *fp = fopen(filename, "w");
+ FILE *fp = fopen(filename, "wb");
if(!fp) file_error(filename);
- fwrite(&net.learning_rate, sizeof(float), 1, fp);
- fwrite(&net.momentum, sizeof(float), 1, fp);
- fwrite(&net.decay, sizeof(float), 1, fp);
- fwrite(&net.seen, sizeof(int), 1, fp);
+ int major = 0;
+ int minor = 1;
+ int revision = 0;
+ fwrite(&major, sizeof(int), 1, fp);
+ fwrite(&minor, sizeof(int), 1, fp);
+ fwrite(&revision, sizeof(int), 1, fp);
+ fwrite(net.seen, sizeof(int), 1, fp);
int i;
for(i = 0; i < net.n && i < cutoff; ++i){
layer l = net.layers[i];
if(l.type == CONVOLUTIONAL){
+ save_convolutional_weights(l, fp);
+ } if(l.type == CONNECTED){
+ save_connected_weights(l, fp);
+ } if(l.type == BATCHNORM){
+ save_batchnorm_weights(l, fp);
+ } if(l.type == RNN){
+ save_connected_weights(*(l.input_layer), fp);
+ save_connected_weights(*(l.self_layer), fp);
+ save_connected_weights(*(l.output_layer), fp);
+ } if(l.type == GRU){
+ save_connected_weights(*(l.input_z_layer), fp);
+ save_connected_weights(*(l.input_r_layer), fp);
+ save_connected_weights(*(l.input_h_layer), fp);
+ save_connected_weights(*(l.state_z_layer), fp);
+ save_connected_weights(*(l.state_r_layer), fp);
+ save_connected_weights(*(l.state_h_layer), fp);
+ } if(l.type == CRNN){
+ save_convolutional_weights(*(l.input_layer), fp);
+ save_convolutional_weights(*(l.self_layer), fp);
+ save_convolutional_weights(*(l.output_layer), fp);
+ } if(l.type == LOCAL){
#ifdef GPU
if(gpu_index >= 0){
- pull_convolutional_layer(l);
+ pull_local_layer(l);
}
#endif
- int num = l.n*l.c*l.size*l.size;
- fwrite(l.biases, sizeof(float), l.n, fp);
- fwrite(l.filters, sizeof(float), num, fp);
- }
- if(l.type == DECONVOLUTIONAL){
-#ifdef GPU
- if(gpu_index >= 0){
- pull_deconvolutional_layer(l);
- }
-#endif
- int num = l.n*l.c*l.size*l.size;
- fwrite(l.biases, sizeof(float), l.n, fp);
- fwrite(l.filters, sizeof(float), num, fp);
- }
- if(l.type == CONNECTED){
-#ifdef GPU
- if(gpu_index >= 0){
- pull_connected_layer(l);
- }
-#endif
+ int locations = l.out_w*l.out_h;
+ int size = l.size*l.size*l.c*l.n*locations;
fwrite(l.biases, sizeof(float), l.outputs, fp);
- fwrite(l.weights, sizeof(float), l.outputs*l.inputs, fp);
+ fwrite(l.weights, sizeof(float), size, fp);
}
}
fclose(fp);
@@ -551,48 +999,200 @@
save_weights_upto(net, filename, net.n);
}
+void transpose_matrix(float *a, int rows, int cols)
+{
+ float *transpose = calloc(rows*cols, sizeof(float));
+ int x, y;
+ for(x = 0; x < rows; ++x){
+ for(y = 0; y < cols; ++y){
+ transpose[y*rows + x] = a[x*cols + y];
+ }
+ }
+ memcpy(a, transpose, rows*cols*sizeof(float));
+ free(transpose);
+}
+
+void load_connected_weights(layer l, FILE *fp, int transpose)
+{
+ fread(l.biases, sizeof(float), l.outputs, fp);
+ fread(l.weights, sizeof(float), l.outputs*l.inputs, fp);
+ if(transpose){
+ transpose_matrix(l.weights, l.inputs, l.outputs);
+ }
+ //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs));
+ //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs));
+ if (l.batch_normalize && (!l.dontloadscales)){
+ fread(l.scales, sizeof(float), l.outputs, fp);
+ fread(l.rolling_mean, sizeof(float), l.outputs, fp);
+ fread(l.rolling_variance, sizeof(float), l.outputs, fp);
+ //printf("Scales: %f mean %f variance\n", mean_array(l.scales, l.outputs), variance_array(l.scales, l.outputs));
+ //printf("rolling_mean: %f mean %f variance\n", mean_array(l.rolling_mean, l.outputs), variance_array(l.rolling_mean, l.outputs));
+ //printf("rolling_variance: %f mean %f variance\n", mean_array(l.rolling_variance, l.outputs), variance_array(l.rolling_variance, l.outputs));
+ }
+#ifdef GPU
+ if(gpu_index >= 0){
+ push_connected_layer(l);
+ }
+#endif
+}
+
+void load_batchnorm_weights(layer l, FILE *fp)
+{
+ fread(l.scales, sizeof(float), l.c, fp);
+ fread(l.rolling_mean, sizeof(float), l.c, fp);
+ fread(l.rolling_variance, sizeof(float), l.c, fp);
+#ifdef GPU
+ if(gpu_index >= 0){
+ push_batchnorm_layer(l);
+ }
+#endif
+}
+
+void load_convolutional_weights_binary(layer l, FILE *fp)
+{
+ fread(l.biases, sizeof(float), l.n, fp);
+ if (l.batch_normalize && (!l.dontloadscales)){
+ fread(l.scales, sizeof(float), l.n, fp);
+ fread(l.rolling_mean, sizeof(float), l.n, fp);
+ fread(l.rolling_variance, sizeof(float), l.n, fp);
+ }
+ int size = l.c*l.size*l.size;
+ int i, j, k;
+ for(i = 0; i < l.n; ++i){
+ float mean = 0;
+ fread(&mean, sizeof(float), 1, fp);
+ for(j = 0; j < size/8; ++j){
+ int index = i*size + j*8;
+ unsigned char c = 0;
+ fread(&c, sizeof(char), 1, fp);
+ for(k = 0; k < 8; ++k){
+ if (j*8 + k >= size) break;
+ l.weights[index + k] = (c & 1<<k) ? mean : -mean;
+ }
+ }
+ }
+#ifdef GPU
+ if(gpu_index >= 0){
+ push_convolutional_layer(l);
+ }
+#endif
+}
+
+void load_convolutional_weights(layer l, FILE *fp)
+{
+ if(l.binary){
+ //load_convolutional_weights_binary(l, fp);
+ //return;
+ }
+ int num = l.n*l.c*l.size*l.size;
+ fread(l.biases, sizeof(float), l.n, fp);
+ if (l.batch_normalize && (!l.dontloadscales)){
+ fread(l.scales, sizeof(float), l.n, fp);
+ fread(l.rolling_mean, sizeof(float), l.n, fp);
+ fread(l.rolling_variance, sizeof(float), l.n, fp);
+ if(0){
+ int i;
+ for(i = 0; i < l.n; ++i){
+ printf("%g, ", l.rolling_mean[i]);
+ }
+ printf("\n");
+ for(i = 0; i < l.n; ++i){
+ printf("%g, ", l.rolling_variance[i]);
+ }
+ printf("\n");
+ }
+ if(0){
+ fill_cpu(l.n, 0, l.rolling_mean, 1);
+ fill_cpu(l.n, 0, l.rolling_variance, 1);
+ }
+ }
+ fread(l.weights, sizeof(float), num, fp);
+ if(l.adam){
+ fread(l.m, sizeof(float), num, fp);
+ fread(l.v, sizeof(float), num, fp);
+ }
+ //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1);
+ if (l.flipped) {
+ transpose_matrix(l.weights, l.c*l.size*l.size, l.n);
+ }
+ //if (l.binary) binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.weights);
+#ifdef GPU
+ if(gpu_index >= 0){
+ push_convolutional_layer(l);
+ }
+#endif
+}
+
+
void load_weights_upto(network *net, char *filename, int cutoff)
{
+#ifdef GPU
+ if(net->gpu_index >= 0){
+ cuda_set_device(net->gpu_index);
+ }
+#endif
fprintf(stderr, "Loading weights from %s...", filename);
fflush(stdout);
- FILE *fp = fopen(filename, "r");
+ FILE *fp = fopen(filename, "rb");
if(!fp) file_error(filename);
- fread(&net->learning_rate, sizeof(float), 1, fp);
- fread(&net->momentum, sizeof(float), 1, fp);
- fread(&net->decay, sizeof(float), 1, fp);
- fread(&net->seen, sizeof(int), 1, fp);
+ int major;
+ int minor;
+ int revision;
+ fread(&major, sizeof(int), 1, fp);
+ fread(&minor, sizeof(int), 1, fp);
+ fread(&revision, sizeof(int), 1, fp);
+ if ((major * 10 + minor) >= 2) {
+ printf("\n seen 64 \n");
+ uint64_t iseen = 0;
+ fread(&iseen, sizeof(uint64_t), 1, fp);
+ *net->seen = iseen;
+ }
+ else {
+ printf("\n seen 32 \n");
+ fread(net->seen, sizeof(int), 1, fp);
+ }
+ int transpose = (major > 1000) || (minor > 1000);
int i;
for(i = 0; i < net->n && i < cutoff; ++i){
layer l = net->layers[i];
if (l.dontload) continue;
if(l.type == CONVOLUTIONAL){
- int num = l.n*l.c*l.size*l.size;
- fread(l.biases, sizeof(float), l.n, fp);
- fread(l.filters, sizeof(float), num, fp);
-#ifdef GPU
- if(gpu_index >= 0){
- push_convolutional_layer(l);
- }
-#endif
- }
- if(l.type == DECONVOLUTIONAL){
- int num = l.n*l.c*l.size*l.size;
- fread(l.biases, sizeof(float), l.n, fp);
- fread(l.filters, sizeof(float), num, fp);
-#ifdef GPU
- if(gpu_index >= 0){
- push_deconvolutional_layer(l);
- }
-#endif
+ load_convolutional_weights(l, fp);
}
if(l.type == CONNECTED){
+ load_connected_weights(l, fp, transpose);
+ }
+ if(l.type == BATCHNORM){
+ load_batchnorm_weights(l, fp);
+ }
+ if(l.type == CRNN){
+ load_convolutional_weights(*(l.input_layer), fp);
+ load_convolutional_weights(*(l.self_layer), fp);
+ load_convolutional_weights(*(l.output_layer), fp);
+ }
+ if(l.type == RNN){
+ load_connected_weights(*(l.input_layer), fp, transpose);
+ load_connected_weights(*(l.self_layer), fp, transpose);
+ load_connected_weights(*(l.output_layer), fp, transpose);
+ }
+ if(l.type == GRU){
+ load_connected_weights(*(l.input_z_layer), fp, transpose);
+ load_connected_weights(*(l.input_r_layer), fp, transpose);
+ load_connected_weights(*(l.input_h_layer), fp, transpose);
+ load_connected_weights(*(l.state_z_layer), fp, transpose);
+ load_connected_weights(*(l.state_r_layer), fp, transpose);
+ load_connected_weights(*(l.state_h_layer), fp, transpose);
+ }
+ if(l.type == LOCAL){
+ int locations = l.out_w*l.out_h;
+ int size = l.size*l.size*l.c*l.n*locations;
fread(l.biases, sizeof(float), l.outputs, fp);
- fread(l.weights, sizeof(float), l.outputs*l.inputs, fp);
+ fread(l.weights, sizeof(float), size, fp);
#ifdef GPU
if(gpu_index >= 0){
- push_connected_layer(l);
+ push_local_layer(l);
}
#endif
}
--
Gitblit v1.10.0