| | |
| | | GPU=0 |
| | | OPENCV=0 |
| | | GPU=1 |
| | | OPENCV=1 |
| | | DEBUG=0 |
| | | |
| | | ARCH= --gpu-architecture=compute_20 --gpu-code=compute_20 |
| | |
| | | [net] |
| | | subdivisions=1 |
| | | inputs=256 |
| | | batch = 1 |
| | | time_steps = 1 |
| | | batch = 128 |
| | | momentum=0.9 |
| | | decay=0.0005 |
| | | max_batches = 50000000 |
| | | decay=0.001 |
| | | max_batches = 50000 |
| | | time_steps=900 |
| | | learning_rate=0.1 |
| | | |
| | | [rnn] |
| | |
| | | |
| | | __device__ float linear_activate_kernel(float x){return x;} |
| | | __device__ float logistic_activate_kernel(float x){return 1./(1. + exp(-x));} |
| | | __device__ float loggy_activate_kernel(float x){return 2./(1. + exp(-x)) - 1;} |
| | | __device__ float relu_activate_kernel(float x){return x*(x>0);} |
| | | __device__ float elu_activate_kernel(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} |
| | | __device__ float relie_activate_kernel(float x){return x*(x>0);} |
| | |
| | | |
| | | __device__ float linear_gradient_kernel(float x){return 1;} |
| | | __device__ float logistic_gradient_kernel(float x){return (1-x)*x;} |
| | | __device__ float loggy_gradient_kernel(float x) |
| | | { |
| | | float y = (x+1.)/2.; |
| | | return 2*(1-y)*y; |
| | | } |
| | | __device__ float relu_gradient_kernel(float x){return (x>0);} |
| | | __device__ float elu_gradient_kernel(float x){return (x >= 0) + (x < 0)*(x + 1);} |
| | | __device__ float relie_gradient_kernel(float x){return (x>0) ? 1 : .01;} |
| | |
| | | return linear_activate_kernel(x); |
| | | case LOGISTIC: |
| | | return logistic_activate_kernel(x); |
| | | case LOGGY: |
| | | return loggy_activate_kernel(x); |
| | | case RELU: |
| | | return relu_activate_kernel(x); |
| | | case ELU: |
| | |
| | | return linear_gradient_kernel(x); |
| | | case LOGISTIC: |
| | | return logistic_gradient_kernel(x); |
| | | case LOGGY: |
| | | return loggy_gradient_kernel(x); |
| | | case RELU: |
| | | return relu_gradient_kernel(x); |
| | | case ELU: |
| | |
| | | switch(a){ |
| | | case LOGISTIC: |
| | | return "logistic"; |
| | | case LOGGY: |
| | | return "loggy"; |
| | | case RELU: |
| | | return "relu"; |
| | | case ELU: |
| | |
| | | ACTIVATION get_activation(char *s) |
| | | { |
| | | if (strcmp(s, "logistic")==0) return LOGISTIC; |
| | | if (strcmp(s, "loggy")==0) return LOGGY; |
| | | if (strcmp(s, "relu")==0) return RELU; |
| | | if (strcmp(s, "elu")==0) return ELU; |
| | | if (strcmp(s, "relie")==0) return RELIE; |
| | |
| | | return linear_activate(x); |
| | | case LOGISTIC: |
| | | return logistic_activate(x); |
| | | case LOGGY: |
| | | return loggy_activate(x); |
| | | case RELU: |
| | | return relu_activate(x); |
| | | case ELU: |
| | |
| | | return linear_gradient(x); |
| | | case LOGISTIC: |
| | | return logistic_gradient(x); |
| | | case LOGGY: |
| | | return loggy_gradient(x); |
| | | case RELU: |
| | | return relu_gradient(x); |
| | | case ELU: |
| | |
| | | #include "math.h" |
| | | |
| | | typedef enum{ |
| | | LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU |
| | | LOGISTIC, RELU, RELIE, LINEAR, RAMP, TANH, PLSE, LEAKY, ELU, LOGGY |
| | | }ACTIVATION; |
| | | |
| | | ACTIVATION get_activation(char *s); |
| | |
| | | |
| | | static inline float linear_activate(float x){return x;} |
| | | static inline float logistic_activate(float x){return 1./(1. + exp(-x));} |
| | | static inline float loggy_activate(float x){return 2./(1. + exp(-x)) - 1;} |
| | | static inline float relu_activate(float x){return x*(x>0);} |
| | | static inline float elu_activate(float x){return (x >= 0)*x + (x < 0)*(exp(x)-1);} |
| | | static inline float relie_activate(float x){return x*(x>0);} |
| | |
| | | |
| | | static inline float linear_gradient(float x){return 1;} |
| | | static inline float logistic_gradient(float x){return (1-x)*x;} |
| | | static inline float loggy_gradient(float x) |
| | | { |
| | | float y = (x+1.)/2.; |
| | | return 2*(1-y)*y; |
| | | } |
| | | static inline float relu_gradient(float x){return (x>0);} |
| | | static inline float elu_gradient(float x){return (x >= 0) + (x < 0)*(x + 1);} |
| | | static inline float relie_gradient(float x){return (x>0) ? 1 : .01;} |
| | |
| | | gpu_index = find_int_arg(argc, argv, "-i", 0); |
| | | if(find_arg(argc, argv, "-nogpu")) { |
| | | gpu_index = -1; |
| | | printf("nogpu\n"); |
| | | } |
| | | |
| | | #ifndef GPU |
| | |
| | | ACTIVATION activation; |
| | | COST_TYPE cost_type; |
| | | int batch_normalize; |
| | | int shortcut; |
| | | int batch; |
| | | int forced; |
| | | int flipped; |
| | |
| | | int i; |
| | | float sum = 0; |
| | | for (i = 0; i < n; ++i){ |
| | | sum += abs(x[i]); |
| | | sum += fabs(x[i]); |
| | | } |
| | | return sum/n; |
| | | } |
| | |
| | | char *activation_s = option_find_str(options, "activation", "logistic"); |
| | | ACTIVATION activation = get_activation(activation_s); |
| | | int batch_normalize = option_find_int_quiet(options, "batch_normalize", 0); |
| | | int logistic = option_find_int_quiet(options, "logistic", 0); |
| | | |
| | | layer l = make_rnn_layer(params.batch, params.inputs, hidden, output, params.time_steps, activation, batch_normalize); |
| | | layer l = make_rnn_layer(params.batch, params.inputs, hidden, output, params.time_steps, activation, batch_normalize, logistic); |
| | | |
| | | l.shortcut = option_find_int_quiet(options, "shortcut", 0); |
| | | |
| | | return l; |
| | | } |
| | |
| | | int i,j; |
| | | for(i = 0; i < batch; ++i){ |
| | | int index = rand() %(len - steps - 1); |
| | | int done = 1; |
| | | while(!done){ |
| | | index = rand() %(len - steps - 1); |
| | | while(index < len-steps-1 && text[index++] != '\n'); |
| | | if (index < len-steps-1) done = 1; |
| | | } |
| | | for(j = 0; j < steps; ++j){ |
| | | x[(j*batch + i)*256 + text[index + j]] = 1; |
| | | y[(j*batch + i)*256 + text[index + j + 1]] = 1; |
| | |
| | | srand(time(0)); |
| | | data_seed = time(0); |
| | | char *base = basecfg(cfgfile); |
| | | printf("%s\n", base); |
| | | fprintf(stderr, "%s\n", base); |
| | | float avg_loss = -1; |
| | | network net = parse_network_cfg(cfgfile); |
| | | if(weightfile){ |
| | | load_weights(&net, weightfile); |
| | | } |
| | | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); |
| | | fprintf(stderr, "Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); |
| | | int batch = net.batch; |
| | | int steps = net.time_steps; |
| | | int i = (*net.seen)/net.batch; |
| | |
| | | if (avg_loss < 0) avg_loss = loss; |
| | | avg_loss = avg_loss*.9 + loss*.1; |
| | | |
| | | printf("%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time)); |
| | | fprintf(stderr, "%d: %f, %f avg, %f rate, %lf seconds\n", i, loss, avg_loss, get_current_rate(net), sec(clock()-time)); |
| | | if(i%100==0){ |
| | | char buff[256]; |
| | | sprintf(buff, "%s/%s_%d.weights", backup_directory, base, i); |
| | |
| | | { |
| | | srand(rseed); |
| | | char *base = basecfg(cfgfile); |
| | | printf("%s\n", base); |
| | | fprintf(stderr, "%s\n", base); |
| | | |
| | | network net = parse_network_cfg(cfgfile); |
| | | if(weightfile){ |
| | |
| | | printf("\n"); |
| | | } |
| | | |
| | | void valid_char_rnn(char *cfgfile, char *weightfile, char *filename) |
| | | { |
| | | FILE *fp = fopen(filename, "r"); |
| | | //FILE *fp = fopen("data/ab.txt", "r"); |
| | | //FILE *fp = fopen("data/grrm/asoiaf.txt", "r"); |
| | | |
| | | fseek(fp, 0, SEEK_END); |
| | | size_t size = ftell(fp); |
| | | fseek(fp, 0, SEEK_SET); |
| | | |
| | | char *text = calloc(size, sizeof(char)); |
| | | fread(text, 1, size, fp); |
| | | fclose(fp); |
| | | |
| | | char *base = basecfg(cfgfile); |
| | | fprintf(stderr, "%s\n", base); |
| | | |
| | | network net = parse_network_cfg(cfgfile); |
| | | if(weightfile){ |
| | | load_weights(&net, weightfile); |
| | | } |
| | | |
| | | int i; |
| | | char c; |
| | | float *input = calloc(256, sizeof(float)); |
| | | float sum = 0; |
| | | for(i = 0; i < size-1; ++i){ |
| | | c = text[i]; |
| | | input[(int)c] = 1; |
| | | float *out = network_predict(net, input); |
| | | input[(int)c] = 0; |
| | | sum += log(out[(int)text[i+1]]); |
| | | } |
| | | printf("Log Probability: %f\n", sum); |
| | | } |
| | | |
| | | |
| | | void run_char_rnn(int argc, char **argv) |
| | | { |
| | | if(argc < 4){ |
| | |
| | | char *cfg = argv[3]; |
| | | char *weights = (argc > 4) ? argv[4] : 0; |
| | | if(0==strcmp(argv[2], "train")) train_char_rnn(cfg, weights, filename); |
| | | else if(0==strcmp(argv[2], "valid")) valid_char_rnn(cfg, weights, filename); |
| | | else if(0==strcmp(argv[2], "test")) test_char_rnn(cfg, weights, len, seed, temp, rseed); |
| | | } |
| | |
| | | #include <string.h> |
| | | |
| | | |
| | | layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize) |
| | | layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log) |
| | | { |
| | | printf("%d %d\n", batch, steps); |
| | | fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs); |
| | | batch = batch / steps; |
| | | layer l = {0}; |
| | | l.batch = batch; |
| | |
| | | l.state = calloc(batch*hidden, sizeof(float)); |
| | | |
| | | l.input_layer = malloc(sizeof(layer)); |
| | | fprintf(stderr, "\t\t"); |
| | | *(l.input_layer) = make_connected_layer(batch*steps, inputs, hidden, activation, batch_normalize); |
| | | l.input_layer->batch = batch; |
| | | |
| | | l.self_layer = malloc(sizeof(layer)); |
| | | *(l.self_layer) = make_connected_layer(batch*steps, hidden, hidden, activation, batch_normalize); |
| | | fprintf(stderr, "\t\t"); |
| | | *(l.self_layer) = make_connected_layer(batch*steps, hidden, hidden, (log==2)?LOGGY:(log==1?LOGISTIC:activation), batch_normalize); |
| | | l.self_layer->batch = batch; |
| | | |
| | | l.output_layer = malloc(sizeof(layer)); |
| | | fprintf(stderr, "\t\t"); |
| | | *(l.output_layer) = make_connected_layer(batch*steps, hidden, outputs, activation, batch_normalize); |
| | | l.output_layer->batch = batch; |
| | | |
| | |
| | | l.delta_gpu = l.output_layer->delta_gpu; |
| | | #endif |
| | | |
| | | fprintf(stderr, "RNN Layer: %d inputs, %d outputs\n", inputs, outputs); |
| | | return l; |
| | | } |
| | | |
| | |
| | | #include "layer.h" |
| | | #include "network.h" |
| | | |
| | | layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize); |
| | | layer make_rnn_layer(int batch, int inputs, int hidden, int outputs, int steps, ACTIVATION activation, int batch_normalize, int log); |
| | | |
| | | void forward_rnn_layer(layer l, network_state state); |
| | | void backward_rnn_layer(layer l, network_state state); |