From 028696bf15efeca3acb3db8c42a96f7b9e0f55ff Mon Sep 17 00:00:00 2001
From: iovodov <b@ovdv.ru>
Date: Thu, 03 May 2018 13:33:46 +0000
Subject: [PATCH] Output improvements for detector results: When printing detector results, output was done in random order, obfuscating results for interpreting. Now: 1. Text output includes coordinates of rects in (left,right,top,bottom in pixels) along with label and score 2. Text output is sorted by rect lefts to simplify finding appropriate rects on image 3. If several class probs are > thresh for some detection, the most probable is written first and coordinates for others are not repeated 4. Rects are imprinted in image in order by their best class prob, so most probable rects are always on top and not overlayed by less probable ones 5. Most probable label for rect is always written first Also: 6. Message about low GPU memory include required amount
---
src/parser.c | 700 +++++++++++++++++++++++++++++++++------------------------
1 files changed, 407 insertions(+), 293 deletions(-)
diff --git a/src/parser.c b/src/parser.c
index 6c88fd5..7441ae2 100644
--- a/src/parser.c
+++ b/src/parser.c
@@ -2,58 +2,82 @@
#include <string.h>
#include <stdlib.h>
-#include "parser.h"
-#include "activations.h"
-#include "crop_layer.h"
-#include "cost_layer.h"
-#include "convolutional_layer.h"
#include "activation_layer.h"
-#include "normalization_layer.h"
-#include "batchnorm_layer.h"
-#include "deconvolutional_layer.h"
-#include "connected_layer.h"
-#include "rnn_layer.h"
-#include "gru_layer.h"
-#include "crnn_layer.h"
-#include "maxpool_layer.h"
-#include "softmax_layer.h"
-#include "dropout_layer.h"
-#include "detection_layer.h"
+#include "activations.h"
+#include "assert.h"
#include "avgpool_layer.h"
+#include "batchnorm_layer.h"
+#include "blas.h"
+#include "connected_layer.h"
+#include "convolutional_layer.h"
+#include "cost_layer.h"
+#include "crnn_layer.h"
+#include "crop_layer.h"
+#include "detection_layer.h"
+#include "dropout_layer.h"
+#include "gru_layer.h"
+#include "list.h"
#include "local_layer.h"
+#include "maxpool_layer.h"
+#include "normalization_layer.h"
+#include "option_list.h"
+#include "parser.h"
+#include "region_layer.h"
+#include "reorg_layer.h"
+#include "reorg_old_layer.h"
+#include "rnn_layer.h"
#include "route_layer.h"
#include "shortcut_layer.h"
-#include "list.h"
-#include "option_list.h"
+#include "softmax_layer.h"
#include "utils.h"
+#include "upsample_layer.h"
+#include "yolo_layer.h"
+#include <stdint.h>
typedef struct{
char *type;
list *options;
}section;
-int is_network(section *s);
-int is_convolutional(section *s);
-int is_activation(section *s);
-int is_local(section *s);
-int is_deconvolutional(section *s);
-int is_connected(section *s);
-int is_rnn(section *s);
-int is_gru(section *s);
-int is_crnn(section *s);
-int is_maxpool(section *s);
-int is_avgpool(section *s);
-int is_dropout(section *s);
-int is_softmax(section *s);
-int is_normalization(section *s);
-int is_batchnorm(section *s);
-int is_crop(section *s);
-int is_shortcut(section *s);
-int is_cost(section *s);
-int is_detection(section *s);
-int is_route(section *s);
list *read_cfg(char *filename);
+LAYER_TYPE string_to_layer_type(char * type)
+{
+
+ if (strcmp(type, "[shortcut]")==0) return SHORTCUT;
+ if (strcmp(type, "[crop]")==0) return CROP;
+ if (strcmp(type, "[cost]")==0) return COST;
+ if (strcmp(type, "[detection]")==0) return DETECTION;
+ if (strcmp(type, "[region]")==0) return REGION;
+ if (strcmp(type, "[yolo]") == 0) return YOLO;
+ if (strcmp(type, "[local]")==0) return LOCAL;
+ if (strcmp(type, "[conv]")==0
+ || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
+ if (strcmp(type, "[activation]")==0) return ACTIVE;
+ if (strcmp(type, "[net]")==0
+ || strcmp(type, "[network]")==0) return NETWORK;
+ if (strcmp(type, "[crnn]")==0) return CRNN;
+ if (strcmp(type, "[gru]")==0) return GRU;
+ if (strcmp(type, "[rnn]")==0) return RNN;
+ if (strcmp(type, "[conn]")==0
+ || strcmp(type, "[connected]")==0) return CONNECTED;
+ if (strcmp(type, "[max]")==0
+ || strcmp(type, "[maxpool]")==0) return MAXPOOL;
+ if (strcmp(type, "[reorg]")==0) return REORG;
+ if (strcmp(type, "[reorg_old]") == 0) return REORG_OLD;
+ if (strcmp(type, "[avg]")==0
+ || strcmp(type, "[avgpool]")==0) return AVGPOOL;
+ if (strcmp(type, "[dropout]")==0) return DROPOUT;
+ if (strcmp(type, "[lrn]")==0
+ || strcmp(type, "[normalization]")==0) return NORMALIZATION;
+ if (strcmp(type, "[batchnorm]")==0) return BATCHNORM;
+ if (strcmp(type, "[soft]")==0
+ || strcmp(type, "[softmax]")==0) return SOFTMAX;
+ if (strcmp(type, "[route]")==0) return ROUTE;
+ if (strcmp(type, "[upsample]") == 0) return UPSAMPLE;
+ return BLANK;
+}
+
void free_section(section *s)
{
free(s->type);
@@ -94,35 +118,9 @@
int c;
int index;
int time_steps;
+ network net;
} size_params;
-deconvolutional_layer parse_deconvolutional(list *options, size_params params)
-{
- int n = option_find_int(options, "filters",1);
- int size = option_find_int(options, "size",1);
- int stride = option_find_int(options, "stride",1);
- char *activation_s = option_find_str(options, "activation", "logistic");
- ACTIVATION activation = get_activation(activation_s);
-
- int batch,h,w,c;
- h = params.h;
- w = params.w;
- c = params.c;
- batch=params.batch;
- if(!(h && w && c)) error("Layer before deconvolutional layer must output image.");
-
- deconvolutional_layer layer = make_deconvolutional_layer(batch,h,w,c,n,size,stride,activation);
-
- char *weights = option_find_str(options, "weights", 0);
- char *biases = option_find_str(options, "biases", 0);
- parse_data(weights, layer.filters, c*n*size*size);
- parse_data(biases, layer.biases, n);
- #ifdef GPU
- if(weights || biases) push_deconvolutional_layer(layer);
- #endif
- return layer;
-}
-
local_layer parse_local(list *options, size_params params)
{
int n = option_find_int(options, "filters",1);
@@ -149,7 +147,10 @@
int n = option_find_int(options, "filters",1);
int size = option_find_int(options, "size",1);
int stride = option_find_int(options, "stride",1);
- int pad = option_find_int(options, "pad",0);
+ int pad = option_find_int_quiet(options, "pad",0);
+ int padding = option_find_int_quiet(options, "padding",0);
+ if(pad) padding = size/2;
+
char *activation_s = option_find_str(options, "activation", "logistic");
ACTIVATION activation = get_activation(activation_s);
@@ -163,17 +164,15 @@
int binary = option_find_int_quiet(options, "binary", 0);
int xnor = option_find_int_quiet(options, "xnor", 0);
- convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,pad,activation, batch_normalize, binary, xnor);
+ convolutional_layer layer = make_convolutional_layer(batch,h,w,c,n,size,stride,padding,activation, batch_normalize, binary, xnor, params.net.adam);
layer.flipped = option_find_int_quiet(options, "flipped", 0);
layer.dot = option_find_float_quiet(options, "dot", 0);
+ if(params.net.adam){
+ layer.B1 = params.net.B1;
+ layer.B2 = params.net.B2;
+ layer.eps = params.net.eps;
+ }
- char *weights = option_find_str(options, "weights", 0);
- char *biases = option_find_str(options, "biases", 0);
- parse_data(weights, layer.filters, c*n*size*size);
- parse_data(biases, layer.biases, n);
- #ifdef GPU
- if(weights || biases) push_convolutional_layer(layer);
- #endif
return layer;
}
@@ -227,13 +226,6 @@
connected_layer layer = make_connected_layer(params.batch, params.inputs, output, activation, batch_normalize);
- char *weights = option_find_str(options, "weights", 0);
- char *biases = option_find_str(options, "biases", 0);
- parse_data(biases, layer.biases, output);
- parse_data(weights, layer.weights, params.inputs*output);
- #ifdef GPU
- if(weights || biases) push_connected_layer(layer);
- #endif
return layer;
}
@@ -242,9 +234,124 @@
int groups = option_find_int_quiet(options, "groups",1);
softmax_layer layer = make_softmax_layer(params.batch, params.inputs, groups);
layer.temperature = option_find_float_quiet(options, "temperature", 1);
+ char *tree_file = option_find_str(options, "tree", 0);
+ if (tree_file) layer.softmax_tree = read_tree(tree_file);
return layer;
}
+int *parse_yolo_mask(char *a, int *num)
+{
+ int *mask = 0;
+ if (a) {
+ int len = strlen(a);
+ int n = 1;
+ int i;
+ for (i = 0; i < len; ++i) {
+ if (a[i] == ',') ++n;
+ }
+ mask = calloc(n, sizeof(int));
+ for (i = 0; i < n; ++i) {
+ int val = atoi(a);
+ mask[i] = val;
+ a = strchr(a, ',') + 1;
+ }
+ *num = n;
+ }
+ return mask;
+}
+
+layer parse_yolo(list *options, size_params params)
+{
+ int classes = option_find_int(options, "classes", 20);
+ int total = option_find_int(options, "num", 1);
+ int num = total;
+
+ char *a = option_find_str(options, "mask", 0);
+ int *mask = parse_yolo_mask(a, &num);
+ int max_boxes = option_find_int_quiet(options, "max", 30);
+ layer l = make_yolo_layer(params.batch, params.w, params.h, num, total, mask, classes, max_boxes);
+ assert(l.outputs == params.inputs);
+
+ //l.max_boxes = option_find_int_quiet(options, "max", 90);
+ l.jitter = option_find_float(options, "jitter", .2);
+ l.focal_loss = option_find_int_quiet(options, "focal_loss", 0);
+
+ l.ignore_thresh = option_find_float(options, "ignore_thresh", .5);
+ l.truth_thresh = option_find_float(options, "truth_thresh", 1);
+ l.random = option_find_int_quiet(options, "random", 0);
+
+ char *map_file = option_find_str(options, "map", 0);
+ if (map_file) l.map = read_map(map_file);
+
+ a = option_find_str(options, "anchors", 0);
+ if (a) {
+ int len = strlen(a);
+ int n = 1;
+ int i;
+ for (i = 0; i < len; ++i) {
+ if (a[i] == ',') ++n;
+ }
+ for (i = 0; i < n && i < total*2; ++i) {
+ float bias = atof(a);
+ l.biases[i] = bias;
+ a = strchr(a, ',') + 1;
+ }
+ }
+ return l;
+}
+
+layer parse_region(list *options, size_params params)
+{
+ int coords = option_find_int(options, "coords", 4);
+ int classes = option_find_int(options, "classes", 20);
+ int num = option_find_int(options, "num", 1);
+ int max_boxes = option_find_int_quiet(options, "max", 30);
+
+ layer l = make_region_layer(params.batch, params.w, params.h, num, classes, coords, max_boxes);
+ assert(l.outputs == params.inputs);
+
+ l.log = option_find_int_quiet(options, "log", 0);
+ l.sqrt = option_find_int_quiet(options, "sqrt", 0);
+
+ l.softmax = option_find_int(options, "softmax", 0);
+ l.focal_loss = option_find_int_quiet(options, "focal_loss", 0);
+ //l.max_boxes = option_find_int_quiet(options, "max",30);
+ l.jitter = option_find_float(options, "jitter", .2);
+ l.rescore = option_find_int_quiet(options, "rescore",0);
+
+ l.thresh = option_find_float(options, "thresh", .5);
+ l.classfix = option_find_int_quiet(options, "classfix", 0);
+ l.absolute = option_find_int_quiet(options, "absolute", 0);
+ l.random = option_find_int_quiet(options, "random", 0);
+
+ l.coord_scale = option_find_float(options, "coord_scale", 1);
+ l.object_scale = option_find_float(options, "object_scale", 1);
+ l.noobject_scale = option_find_float(options, "noobject_scale", 1);
+ l.mask_scale = option_find_float(options, "mask_scale", 1);
+ l.class_scale = option_find_float(options, "class_scale", 1);
+ l.bias_match = option_find_int_quiet(options, "bias_match",0);
+
+ char *tree_file = option_find_str(options, "tree", 0);
+ if (tree_file) l.softmax_tree = read_tree(tree_file);
+ char *map_file = option_find_str(options, "map", 0);
+ if (map_file) l.map = read_map(map_file);
+
+ char *a = option_find_str(options, "anchors", 0);
+ if(a){
+ int len = strlen(a);
+ int n = 1;
+ int i;
+ for(i = 0; i < len; ++i){
+ if (a[i] == ',') ++n;
+ }
+ for(i = 0; i < n && i < num*2; ++i){
+ float bias = atof(a);
+ l.biases[i] = bias;
+ a = strchr(a, ',')+1;
+ }
+ }
+ return l;
+}
detection_layer parse_detection(list *options, size_params params)
{
int coords = option_find_int(options, "coords", 1);
@@ -257,12 +364,15 @@
layer.softmax = option_find_int(options, "softmax", 0);
layer.sqrt = option_find_int(options, "sqrt", 0);
+ layer.max_boxes = option_find_int_quiet(options, "max",30);
layer.coord_scale = option_find_float(options, "coord_scale", 1);
layer.forced = option_find_int(options, "forced", 0);
layer.object_scale = option_find_float(options, "object_scale", 1);
layer.noobject_scale = option_find_float(options, "noobject_scale", 1);
layer.class_scale = option_find_float(options, "class_scale", 1);
layer.jitter = option_find_float(options, "jitter", .2);
+ layer.random = option_find_int_quiet(options, "random", 0);
+ layer.reorg = option_find_int_quiet(options, "reorg", 0);
return layer;
}
@@ -272,6 +382,7 @@
COST_TYPE type = get_cost_type(type_s);
float scale = option_find_float_quiet(options, "scale",1);
cost_layer layer = make_cost_layer(params.batch, params.inputs, type, scale);
+ layer.ratio = option_find_float_quiet(options, "ratio",0);
return layer;
}
@@ -299,10 +410,44 @@
return l;
}
+layer parse_reorg(list *options, size_params params)
+{
+ int stride = option_find_int(options, "stride",1);
+ int reverse = option_find_int_quiet(options, "reverse",0);
+
+ int batch,h,w,c;
+ h = params.h;
+ w = params.w;
+ c = params.c;
+ batch=params.batch;
+ if(!(h && w && c)) error("Layer before reorg layer must output image.");
+
+ layer layer = make_reorg_layer(batch,w,h,c,stride,reverse);
+ return layer;
+}
+
+layer parse_reorg_old(list *options, size_params params)
+{
+ printf("\n reorg_old \n");
+ int stride = option_find_int(options, "stride", 1);
+ int reverse = option_find_int_quiet(options, "reverse", 0);
+
+ int batch, h, w, c;
+ h = params.h;
+ w = params.w;
+ c = params.c;
+ batch = params.batch;
+ if (!(h && w && c)) error("Layer before reorg layer must output image.");
+
+ layer layer = make_reorg_old_layer(batch, w, h, c, stride, reverse);
+ return layer;
+}
+
maxpool_layer parse_maxpool(list *options, size_params params)
{
int stride = option_find_int(options, "stride",1);
int size = option_find_int(options, "size",stride);
+ int padding = option_find_int_quiet(options, "padding", (size-1)/2);
int batch,h,w,c;
h = params.h;
@@ -311,7 +456,7 @@
batch=params.batch;
if(!(h && w && c)) error("Layer before maxpool layer must output image.");
- maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride);
+ maxpool_layer layer = make_maxpool_layer(batch,h,w,c,size,stride,padding);
return layer;
}
@@ -389,6 +534,15 @@
return l;
}
+layer parse_upsample(list *options, size_params params, network net)
+{
+
+ int stride = option_find_int(options, "stride", 2);
+ layer l = make_upsample_layer(params.batch, params.w, params.h, params.c, stride);
+ l.scale = option_find_float_quiet(options, "scale", 1);
+ return l;
+}
+
route_layer parse_route(list *options, size_params params, network net)
{
char *l = option_find(options, "layers");
@@ -432,6 +586,7 @@
learning_rate_policy get_policy(char *s)
{
+ if (strcmp(s, "random")==0) return RANDOM;
if (strcmp(s, "poly")==0) return POLY;
if (strcmp(s, "constant")==0) return CONSTANT;
if (strcmp(s, "step")==0) return STEP;
@@ -454,17 +609,37 @@
net->batch *= net->time_steps;
net->subdivisions = subdivs;
+ net->adam = option_find_int_quiet(options, "adam", 0);
+ if(net->adam){
+ net->B1 = option_find_float(options, "B1", .9);
+ net->B2 = option_find_float(options, "B2", .999);
+ net->eps = option_find_float(options, "eps", .000001);
+ }
+
net->h = option_find_int_quiet(options, "height",0);
net->w = option_find_int_quiet(options, "width",0);
net->c = option_find_int_quiet(options, "channels",0);
net->inputs = option_find_int_quiet(options, "inputs", net->h * net->w * net->c);
net->max_crop = option_find_int_quiet(options, "max_crop",net->w*2);
net->min_crop = option_find_int_quiet(options, "min_crop",net->w);
+ net->flip = option_find_int_quiet(options, "flip", 1);
+
+ net->small_object = option_find_int_quiet(options, "small_object", 0);
+ net->angle = option_find_float_quiet(options, "angle", 0);
+ net->aspect = option_find_float_quiet(options, "aspect", 1);
+ net->saturation = option_find_float_quiet(options, "saturation", 1);
+ net->exposure = option_find_float_quiet(options, "exposure", 1);
+ net->hue = option_find_float_quiet(options, "hue", 0);
+ net->power = option_find_float_quiet(options, "power", 4);
if(!net->inputs && !(net->h && net->w && net->c)) error("No input parameters supplied");
char *policy_s = option_find_str(options, "policy", "constant");
net->policy = get_policy(policy_s);
+ net->burn_in = option_find_int_quiet(options, "burn_in", 0);
+#ifdef CUDNN_HALF
+ net->burn_in = 0;
+#endif
if(net->policy == STEP){
net->step = option_find_int(options, "step", 1);
net->scale = option_find_float(options, "scale", 1);
@@ -497,18 +672,30 @@
} else if (net->policy == SIG){
net->gamma = option_find_float(options, "gamma", 1);
net->step = option_find_int(options, "step", 1);
- } else if (net->policy == POLY){
- net->power = option_find_float(options, "power", 1);
+ } else if (net->policy == POLY || net->policy == RANDOM){
+ //net->power = option_find_float(options, "power", 1);
}
net->max_batches = option_find_int(options, "max_batches", 0);
}
+int is_network(section *s)
+{
+ return (strcmp(s->type, "[net]")==0
+ || strcmp(s->type, "[network]")==0);
+}
+
network parse_network_cfg(char *filename)
{
+ return parse_network_cfg_custom(filename, 0);
+}
+
+network parse_network_cfg_custom(char *filename, int batch)
+{
list *sections = read_cfg(filename);
node *n = sections->front;
if(!n) error("Config file has no sections");
network net = make_network(sections->size - 1);
+ net.gpu_index = gpu_index;
size_params params;
section *s = (section *)n->val;
@@ -520,55 +707,69 @@
params.w = net.w;
params.c = net.c;
params.inputs = net.inputs;
+ if (batch > 0) net.batch = batch;
params.batch = net.batch;
params.time_steps = net.time_steps;
+ params.net = net;
+ size_t workspace_size = 0;
n = n->next;
int count = 0;
free_section(s);
+ fprintf(stderr, "layer filters size input output\n");
while(n){
params.index = count;
- fprintf(stderr, "%d: ", count);
+ fprintf(stderr, "%5d ", count);
s = (section *)n->val;
options = s->options;
layer l = {0};
- if(is_convolutional(s)){
+ LAYER_TYPE lt = string_to_layer_type(s->type);
+ if(lt == CONVOLUTIONAL){
l = parse_convolutional(options, params);
- }else if(is_local(s)){
+ }else if(lt == LOCAL){
l = parse_local(options, params);
- }else if(is_activation(s)){
+ }else if(lt == ACTIVE){
l = parse_activation(options, params);
- }else if(is_deconvolutional(s)){
- l = parse_deconvolutional(options, params);
- }else if(is_rnn(s)){
+ }else if(lt == RNN){
l = parse_rnn(options, params);
- }else if(is_gru(s)){
+ }else if(lt == GRU){
l = parse_gru(options, params);
- }else if(is_crnn(s)){
+ }else if(lt == CRNN){
l = parse_crnn(options, params);
- }else if(is_connected(s)){
+ }else if(lt == CONNECTED){
l = parse_connected(options, params);
- }else if(is_crop(s)){
+ }else if(lt == CROP){
l = parse_crop(options, params);
- }else if(is_cost(s)){
+ }else if(lt == COST){
l = parse_cost(options, params);
- }else if(is_detection(s)){
+ }else if(lt == REGION){
+ l = parse_region(options, params);
+ }else if (lt == YOLO) {
+ l = parse_yolo(options, params);
+ }else if(lt == DETECTION){
l = parse_detection(options, params);
- }else if(is_softmax(s)){
+ }else if(lt == SOFTMAX){
l = parse_softmax(options, params);
- }else if(is_normalization(s)){
+ net.hierarchy = l.softmax_tree;
+ }else if(lt == NORMALIZATION){
l = parse_normalization(options, params);
- }else if(is_batchnorm(s)){
+ }else if(lt == BATCHNORM){
l = parse_batchnorm(options, params);
- }else if(is_maxpool(s)){
+ }else if(lt == MAXPOOL){
l = parse_maxpool(options, params);
- }else if(is_avgpool(s)){
+ }else if(lt == REORG){
+ l = parse_reorg(options, params); }
+ else if (lt == REORG_OLD) {
+ l = parse_reorg_old(options, params);
+ }else if(lt == AVGPOOL){
l = parse_avgpool(options, params);
- }else if(is_route(s)){
+ }else if(lt == ROUTE){
l = parse_route(options, params, net);
- }else if(is_shortcut(s)){
+ }else if (lt == UPSAMPLE) {
+ l = parse_upsample(options, params, net);
+ }else if(lt == SHORTCUT){
l = parse_shortcut(options, params, net);
- }else if(is_dropout(s)){
+ }else if(lt == DROPOUT){
l = parse_dropout(options, params);
l.output = net.layers[count-1].output;
l.delta = net.layers[count-1].delta;
@@ -579,10 +780,13 @@
}else{
fprintf(stderr, "Type not recognized: %s\n", s->type);
}
+ l.onlyforward = option_find_int_quiet(options, "onlyforward", 0);
+ l.stopbackward = option_find_int_quiet(options, "stopbackward", 0);
l.dontload = option_find_int_quiet(options, "dontload", 0);
l.dontloadscales = option_find_int_quiet(options, "dontloadscales", 0);
option_unused(options);
net.layers[count] = l;
+ if (l.workspace_size > workspace_size) workspace_size = l.workspace_size;
free_section(s);
n = n->next;
++count;
@@ -596,134 +800,22 @@
free_list(sections);
net.outputs = get_network_output_size(net);
net.output = get_network_output(net);
+ if(workspace_size){
+ //printf("%ld\n", workspace_size);
+#ifdef GPU
+ if(gpu_index >= 0){
+ net.workspace = cuda_make_array(0, (workspace_size-1)/sizeof(float)+1);
+ }else {
+ net.workspace = calloc(1, workspace_size);
+ }
+#else
+ net.workspace = calloc(1, workspace_size);
+#endif
+ }
return net;
}
-LAYER_TYPE string_to_layer_type(char * type)
-{
- if (strcmp(type, "[shortcut]")==0) return SHORTCUT;
- if (strcmp(type, "[crop]")==0) return CROP;
- if (strcmp(type, "[cost]")==0) return COST;
- if (strcmp(type, "[detection]")==0) return DETECTION;
- if (strcmp(type, "[local]")==0) return LOCAL;
- if (strcmp(type, "[deconv]")==0
- || strcmp(type, "[deconvolutional]")==0) return DECONVOLUTIONAL;
- if (strcmp(type, "[conv]")==0
- || strcmp(type, "[convolutional]")==0) return CONVOLUTIONAL;
- if (strcmp(type, "[activation]")==0) return ACTIVE;
- if (strcmp(type, "[net]")==0
- || strcmp(type, "[network]")==0) return NETWORK;
- if (strcmp(type, "[crnn]")==0) return CRNN;
- if (strcmp(type, "[gru]")==0) return GRU;
- if (strcmp(type, "[rnn]")==0) return RNN;
- if (strcmp(type, "[conn]")==0
- || strcmp(type, "[connected]")==0) return CONNECTED;
- if (strcmp(type, "[max]")==0
- || strcmp(type, "[maxpool]")==0) return MAXPOOL;
- if (strcmp(type, "[avg]")==0
- || strcmp(type, "[avgpool]")==0) return AVGPOOL;
- if (strcmp(type, "[dropout]")==0) return DROPOUT;
- if (strcmp(type, "[lrn]")==0
- || strcmp(type, "[normalization]")==0) return NORMALIZATION;
- if (strcmp(type, "[batchnorm]")==0) return BATCHNORM;
- if (strcmp(type, "[soft]")==0
- || strcmp(type, "[softmax]")==0) return SOFTMAX;
- if (strcmp(type, "[route]")==0) return ROUTE;
- return BLANK;
-}
-
-int is_shortcut(section *s)
-{
- return (strcmp(s->type, "[shortcut]")==0);
-}
-int is_crop(section *s)
-{
- return (strcmp(s->type, "[crop]")==0);
-}
-int is_cost(section *s)
-{
- return (strcmp(s->type, "[cost]")==0);
-}
-int is_detection(section *s)
-{
- return (strcmp(s->type, "[detection]")==0);
-}
-int is_local(section *s)
-{
- return (strcmp(s->type, "[local]")==0);
-}
-int is_deconvolutional(section *s)
-{
- return (strcmp(s->type, "[deconv]")==0
- || strcmp(s->type, "[deconvolutional]")==0);
-}
-int is_convolutional(section *s)
-{
- return (strcmp(s->type, "[conv]")==0
- || strcmp(s->type, "[convolutional]")==0);
-}
-int is_activation(section *s)
-{
- return (strcmp(s->type, "[activation]")==0);
-}
-int is_network(section *s)
-{
- return (strcmp(s->type, "[net]")==0
- || strcmp(s->type, "[network]")==0);
-}
-int is_crnn(section *s)
-{
- return (strcmp(s->type, "[crnn]")==0);
-}
-int is_gru(section *s)
-{
- return (strcmp(s->type, "[gru]")==0);
-}
-int is_rnn(section *s)
-{
- return (strcmp(s->type, "[rnn]")==0);
-}
-int is_connected(section *s)
-{
- return (strcmp(s->type, "[conn]")==0
- || strcmp(s->type, "[connected]")==0);
-}
-int is_maxpool(section *s)
-{
- return (strcmp(s->type, "[max]")==0
- || strcmp(s->type, "[maxpool]")==0);
-}
-int is_avgpool(section *s)
-{
- return (strcmp(s->type, "[avg]")==0
- || strcmp(s->type, "[avgpool]")==0);
-}
-int is_dropout(section *s)
-{
- return (strcmp(s->type, "[dropout]")==0);
-}
-
-int is_normalization(section *s)
-{
- return (strcmp(s->type, "[lrn]")==0
- || strcmp(s->type, "[normalization]")==0);
-}
-
-int is_batchnorm(section *s)
-{
- return (strcmp(s->type, "[batchnorm]")==0);
-}
-
-int is_softmax(section *s)
-{
- return (strcmp(s->type, "[soft]")==0
- || strcmp(s->type, "[softmax]")==0);
-}
-int is_route(section *s)
-{
- return (strcmp(s->type, "[route]")==0);
-}
list *read_cfg(char *filename)
{
@@ -760,45 +852,6 @@
return sections;
}
-void save_weights_double(network net, char *filename)
-{
- fprintf(stderr, "Saving doubled weights to %s\n", filename);
- FILE *fp = fopen(filename, "w");
- if(!fp) file_error(filename);
-
- fwrite(&net.learning_rate, sizeof(float), 1, fp);
- fwrite(&net.momentum, sizeof(float), 1, fp);
- fwrite(&net.decay, sizeof(float), 1, fp);
- fwrite(net.seen, sizeof(int), 1, fp);
-
- int i,j,k;
- for(i = 0; i < net.n; ++i){
- layer l = net.layers[i];
- if(l.type == CONVOLUTIONAL){
-#ifdef GPU
- if(gpu_index >= 0){
- pull_convolutional_layer(l);
- }
-#endif
- float zero = 0;
- fwrite(l.biases, sizeof(float), l.n, fp);
- fwrite(l.biases, sizeof(float), l.n, fp);
-
- for (j = 0; j < l.n; ++j){
- int index = j*l.c*l.size*l.size;
- fwrite(l.filters+index, sizeof(float), l.c*l.size*l.size, fp);
- for (k = 0; k < l.c*l.size*l.size; ++k) fwrite(&zero, sizeof(float), 1, fp);
- }
- for (j = 0; j < l.n; ++j){
- int index = j*l.c*l.size*l.size;
- for (k = 0; k < l.c*l.size*l.size; ++k) fwrite(&zero, sizeof(float), 1, fp);
- fwrite(l.filters+index, sizeof(float), l.c*l.size*l.size, fp);
- }
- }
- }
- fclose(fp);
-}
-
void save_convolutional_weights_binary(layer l, FILE *fp)
{
#ifdef GPU
@@ -806,7 +859,7 @@
pull_convolutional_layer(l);
}
#endif
- binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.binary_filters);
+ binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.binary_weights);
int size = l.c*l.size*l.size;
int i, j, k;
fwrite(l.biases, sizeof(float), l.n, fp);
@@ -816,7 +869,7 @@
fwrite(l.rolling_variance, sizeof(float), l.n, fp);
}
for(i = 0; i < l.n; ++i){
- float mean = l.binary_filters[i*size];
+ float mean = l.binary_weights[i*size];
if(mean < 0) mean = -mean;
fwrite(&mean, sizeof(float), 1, fp);
for(j = 0; j < size/8; ++j){
@@ -824,7 +877,7 @@
unsigned char c = 0;
for(k = 0; k < 8; ++k){
if (j*8 + k >= size) break;
- if (l.binary_filters[index + k] > 0) c = (c | 1<<k);
+ if (l.binary_weights[index + k] > 0) c = (c | 1<<k);
}
fwrite(&c, sizeof(char), 1, fp);
}
@@ -849,7 +902,23 @@
fwrite(l.rolling_mean, sizeof(float), l.n, fp);
fwrite(l.rolling_variance, sizeof(float), l.n, fp);
}
- fwrite(l.filters, sizeof(float), num, fp);
+ fwrite(l.weights, sizeof(float), num, fp);
+ if(l.adam){
+ fwrite(l.m, sizeof(float), num, fp);
+ fwrite(l.v, sizeof(float), num, fp);
+ }
+}
+
+void save_batchnorm_weights(layer l, FILE *fp)
+{
+#ifdef GPU
+ if(gpu_index >= 0){
+ pull_batchnorm_layer(l);
+ }
+#endif
+ fwrite(l.scales, sizeof(float), l.c, fp);
+ fwrite(l.rolling_mean, sizeof(float), l.c, fp);
+ fwrite(l.rolling_variance, sizeof(float), l.c, fp);
}
void save_connected_weights(layer l, FILE *fp)
@@ -870,8 +939,13 @@
void save_weights_upto(network net, char *filename, int cutoff)
{
+#ifdef GPU
+ if(net.gpu_index >= 0){
+ cuda_set_device(net.gpu_index);
+ }
+#endif
fprintf(stderr, "Saving weights to %s\n", filename);
- FILE *fp = fopen(filename, "w");
+ FILE *fp = fopen(filename, "wb");
if(!fp) file_error(filename);
int major = 0;
@@ -889,6 +963,8 @@
save_convolutional_weights(l, fp);
} if(l.type == CONNECTED){
save_connected_weights(l, fp);
+ } if(l.type == BATCHNORM){
+ save_batchnorm_weights(l, fp);
} if(l.type == RNN){
save_connected_weights(*(l.input_layer), fp);
save_connected_weights(*(l.self_layer), fp);
@@ -913,7 +989,7 @@
int locations = l.out_w*l.out_h;
int size = l.size*l.size*l.c*l.n*locations;
fwrite(l.biases, sizeof(float), l.outputs, fp);
- fwrite(l.filters, sizeof(float), size, fp);
+ fwrite(l.weights, sizeof(float), size, fp);
}
}
fclose(fp);
@@ -943,8 +1019,8 @@
if(transpose){
transpose_matrix(l.weights, l.inputs, l.outputs);
}
- //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs));
- //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs));
+ //printf("Biases: %f mean %f variance\n", mean_array(l.biases, l.outputs), variance_array(l.biases, l.outputs));
+ //printf("Weights: %f mean %f variance\n", mean_array(l.weights, l.outputs*l.inputs), variance_array(l.weights, l.outputs*l.inputs));
if (l.batch_normalize && (!l.dontloadscales)){
fread(l.scales, sizeof(float), l.outputs, fp);
fread(l.rolling_mean, sizeof(float), l.outputs, fp);
@@ -960,6 +1036,18 @@
#endif
}
+void load_batchnorm_weights(layer l, FILE *fp)
+{
+ fread(l.scales, sizeof(float), l.c, fp);
+ fread(l.rolling_mean, sizeof(float), l.c, fp);
+ fread(l.rolling_variance, sizeof(float), l.c, fp);
+#ifdef GPU
+ if(gpu_index >= 0){
+ push_batchnorm_layer(l);
+ }
+#endif
+}
+
void load_convolutional_weights_binary(layer l, FILE *fp)
{
fread(l.biases, sizeof(float), l.n, fp);
@@ -979,11 +1067,10 @@
fread(&c, sizeof(char), 1, fp);
for(k = 0; k < 8; ++k){
if (j*8 + k >= size) break;
- l.filters[index + k] = (c & 1<<k) ? mean : -mean;
+ l.weights[index + k] = (c & 1<<k) ? mean : -mean;
}
}
}
- binarize_filters2(l.filters, l.n, l.c*l.size*l.size, l.cfilters, l.scales);
#ifdef GPU
if(gpu_index >= 0){
push_convolutional_layer(l);
@@ -1003,12 +1090,32 @@
fread(l.scales, sizeof(float), l.n, fp);
fread(l.rolling_mean, sizeof(float), l.n, fp);
fread(l.rolling_variance, sizeof(float), l.n, fp);
+ if(0){
+ int i;
+ for(i = 0; i < l.n; ++i){
+ printf("%g, ", l.rolling_mean[i]);
+ }
+ printf("\n");
+ for(i = 0; i < l.n; ++i){
+ printf("%g, ", l.rolling_variance[i]);
+ }
+ printf("\n");
+ }
+ if(0){
+ fill_cpu(l.n, 0, l.rolling_mean, 1);
+ fill_cpu(l.n, 0, l.rolling_variance, 1);
+ }
}
- fread(l.filters, sizeof(float), num, fp);
+ fread(l.weights, sizeof(float), num, fp);
+ if(l.adam){
+ fread(l.m, sizeof(float), num, fp);
+ fread(l.v, sizeof(float), num, fp);
+ }
+ //if(l.c == 3) scal_cpu(num, 1./256, l.weights, 1);
if (l.flipped) {
- transpose_matrix(l.filters, l.c*l.size*l.size, l.n);
+ transpose_matrix(l.weights, l.c*l.size*l.size, l.n);
}
- if (l.binary) binarize_filters(l.filters, l.n, l.c*l.size*l.size, l.filters);
+ //if (l.binary) binarize_weights(l.weights, l.n, l.c*l.size*l.size, l.weights);
#ifdef GPU
if(gpu_index >= 0){
push_convolutional_layer(l);
@@ -1019,6 +1126,11 @@
void load_weights_upto(network *net, char *filename, int cutoff)
{
+#ifdef GPU
+ if(net->gpu_index >= 0){
+ cuda_set_device(net->gpu_index);
+ }
+#endif
fprintf(stderr, "Loading weights from %s...", filename);
fflush(stdout);
FILE *fp = fopen(filename, "rb");
@@ -1030,7 +1142,16 @@
fread(&major, sizeof(int), 1, fp);
fread(&minor, sizeof(int), 1, fp);
fread(&revision, sizeof(int), 1, fp);
- fread(net->seen, sizeof(int), 1, fp);
+ if ((major * 10 + minor) >= 2) {
+ printf("\n seen 64 \n");
+ uint64_t iseen = 0;
+ fread(&iseen, sizeof(uint64_t), 1, fp);
+ *net->seen = iseen;
+ }
+ else {
+ printf("\n seen 32 \n");
+ fread(net->seen, sizeof(int), 1, fp);
+ }
int transpose = (major > 1000) || (minor > 1000);
int i;
@@ -1040,19 +1161,12 @@
if(l.type == CONVOLUTIONAL){
load_convolutional_weights(l, fp);
}
- if(l.type == DECONVOLUTIONAL){
- int num = l.n*l.c*l.size*l.size;
- fread(l.biases, sizeof(float), l.n, fp);
- fread(l.filters, sizeof(float), num, fp);
-#ifdef GPU
- if(gpu_index >= 0){
- push_deconvolutional_layer(l);
- }
-#endif
- }
if(l.type == CONNECTED){
load_connected_weights(l, fp, transpose);
}
+ if(l.type == BATCHNORM){
+ load_batchnorm_weights(l, fp);
+ }
if(l.type == CRNN){
load_convolutional_weights(*(l.input_layer), fp);
load_convolutional_weights(*(l.self_layer), fp);
@@ -1075,7 +1189,7 @@
int locations = l.out_w*l.out_h;
int size = l.size*l.size*l.c*l.n*locations;
fread(l.biases, sizeof(float), l.outputs, fp);
- fread(l.filters, sizeof(float), size, fp);
+ fread(l.weights, sizeof(float), size, fp);
#ifdef GPU
if(gpu_index >= 0){
push_local_layer(l);
--
Gitblit v1.10.0