| | |
| | | |
| | | CC=gcc |
| | | NVCC=nvcc |
| | | OPTS=-O3 |
| | | OPTS=-O0 |
| | | LDFLAGS=`pkg-config --libs opencv` -lm -pthread -lstdc++ |
| | | COMMON=`pkg-config --cflags opencv` -I/usr/local/cuda/include/ |
| | | CFLAGS=-Wall -Wfatal-errors |
| | |
| | | ifeq ($(GPU), 1) |
| | | COMMON+=-DGPU |
| | | CFLAGS+=-DGPU |
| | | LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas |
| | | LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand |
| | | endif |
| | | |
| | | OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o normalization_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o detection.o |
| | |
| | | __device__ float relu_activate_kernel(float x){return x*(x>0);} |
| | | __device__ float ramp_activate_kernel(float x){return x*(x>0)+.1*x;} |
| | | __device__ float tanh_activate_kernel(float x){return (exp(2*x)-1)/(exp(2*x)+1);} |
| | | __device__ float plse_activate_kernel(float x) |
| | | { |
| | | if(x < -4) return .01 * (x + 4); |
| | | if(x > 4) return .01 * (x - 4) + 1; |
| | | return .125*x + .5; |
| | | } |
| | | |
| | | __device__ float linear_gradient_kernel(float x){return 1;} |
| | | __device__ float logistic_gradient_kernel(float x){return (1-x)*x;} |
| | | __device__ float relu_gradient_kernel(float x){return (x>0);} |
| | | __device__ float ramp_gradient_kernel(float x){return (x>0)+.1;} |
| | | __device__ float tanh_gradient_kernel(float x){return 1-x*x;} |
| | | __device__ float plse_gradient_kernel(float x){return (x < 0 || x > 1) ? .01 : .125;} |
| | | |
| | | __device__ float activate_kernel(float x, ACTIVATION a) |
| | | { |
| | |
| | | return ramp_activate_kernel(x); |
| | | case TANH: |
| | | return tanh_activate_kernel(x); |
| | | case PLSE: |
| | | return plse_activate_kernel(x); |
| | | } |
| | | return 0; |
| | | } |
| | |
| | | return ramp_gradient_kernel(x); |
| | | case TANH: |
| | | return tanh_gradient_kernel(x); |
| | | case PLSE: |
| | | return plse_gradient_kernel(x); |
| | | } |
| | | return 0; |
| | | } |
| | |
| | | return "linear"; |
| | | case TANH: |
| | | return "tanh"; |
| | | case PLSE: |
| | | return "plse"; |
| | | default: |
| | | break; |
| | | } |
| | |
| | | { |
| | | if (strcmp(s, "logistic")==0) return LOGISTIC; |
| | | if (strcmp(s, "relu")==0) return RELU; |
| | | if (strcmp(s, "plse")==0) return PLSE; |
| | | if (strcmp(s, "linear")==0) return LINEAR; |
| | | if (strcmp(s, "ramp")==0) return RAMP; |
| | | if (strcmp(s, "tanh")==0) return TANH; |
| | |
| | | return ramp_activate(x); |
| | | case TANH: |
| | | return tanh_activate(x); |
| | | case PLSE: |
| | | return plse_activate(x); |
| | | } |
| | | return 0; |
| | | } |
| | |
| | | return ramp_gradient(x); |
| | | case TANH: |
| | | return tanh_gradient(x); |
| | | case PLSE: |
| | | return plse_gradient(x); |
| | | } |
| | | return 0; |
| | | } |
| | |
| | | #define ACTIVATIONS_H |
| | | |
| | | typedef enum{ |
| | | LOGISTIC, RELU, LINEAR, RAMP, TANH |
| | | LOGISTIC, RELU, LINEAR, RAMP, TANH, PLSE |
| | | }ACTIVATION; |
| | | |
| | | ACTIVATION get_activation(char *s); |
| | |
| | | static inline float relu_activate(float x){return x*(x>0);} |
| | | static inline float ramp_activate(float x){return x*(x>0)+.1*x;} |
| | | static inline float tanh_activate(float x){return (exp(2*x)-1)/(exp(2*x)+1);} |
| | | static inline float plse_activate(float x) |
| | | { |
| | | if(x < -4) return .01 * (x + 4); |
| | | if(x > 4) return .01 * (x - 4) + 1; |
| | | return .125*x + .5; |
| | | } |
| | | |
| | | static inline float linear_gradient(float x){return 1;} |
| | | static inline float logistic_gradient(float x){return (1-x)*x;} |
| | | static inline float relu_gradient(float x){return (x>0);} |
| | | static inline float ramp_gradient(float x){return (x>0)+.1;} |
| | | static inline float tanh_gradient(float x){return 1-x*x;} |
| | | static inline float plse_gradient(float x){return (x < 0 || x > 1) ? .01 : .125;} |
| | | |
| | | #endif |
| | | |
| | |
| | | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); |
| | | int imgs = 1024; |
| | | int i = net.seen/imgs; |
| | | list *plist = get_paths("/data/captcha/train.base"); |
| | | list *plist = get_paths("/data/captcha/train.auto5"); |
| | | char **paths = (char **)list_to_array(plist); |
| | | printf("%d\n", plist->size); |
| | | clock_t time; |
| | |
| | | avg_loss = avg_loss*.9 + loss*.1; |
| | | printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), net.seen); |
| | | free_data(train); |
| | | if(i%100==0){ |
| | | if(i%10==0){ |
| | | char buff[256]; |
| | | sprintf(buff, "/home/pjreddie/imagenet_backup/%s_%d.weights",base, i); |
| | | save_weights(net, buff); |
| | |
| | | printf("Enter filename: "); |
| | | fgets(filename, 256, stdin); |
| | | strtok(filename, "\n"); |
| | | image im = load_image_color(filename, 60, 200); |
| | | image im = load_image_color(filename, 57, 300); |
| | | scale_image(im, 1./255.); |
| | | float *X = im.data; |
| | | float *predictions = network_predict(net, X); |
| | | image out = float_to_image(60, 200, 3, predictions); |
| | | image out = float_to_image(57, 300, 1, predictions); |
| | | show_image(out, "decoded"); |
| | | cvWaitKey(0); |
| | | free_image(im); |
| | |
| | | while(1){ |
| | | ++i; |
| | | time=clock(); |
| | | data train = load_data_captcha_encode(paths, imgs, plist->size, 60, 200); |
| | | data train = load_data_captcha_encode(paths, imgs, plist->size, 57, 300); |
| | | scale_data_rows(train, 1./255); |
| | | printf("Loaded: %lf seconds\n", sec(clock()-time)); |
| | | time=clock(); |
| | |
| | | if(weightfile){ |
| | | load_weights(&net, weightfile); |
| | | } |
| | | int imgs = 1000; |
| | | int numchars = 37; |
| | | list *plist = get_paths("/data/captcha/valid.base"); |
| | | list *plist = get_paths("/data/captcha/solved.hard"); |
| | | char **paths = (char **)list_to_array(plist); |
| | | int imgs = plist->size; |
| | | data valid = load_data_captcha(paths, imgs, 0, 10, 60, 200); |
| | | translate_data_rows(valid, -128); |
| | | scale_data_rows(valid, 1./128); |
| | |
| | | |
| | | extern "C" void forward_convolutional_layer_gpu(convolutional_layer layer, network_state state) |
| | | { |
| | | clock_t time = clock(); |
| | | int i; |
| | | int m = layer.n; |
| | | int k = layer.size*layer.size*layer.c; |
| | |
| | | convolutional_out_width(layer); |
| | | |
| | | bias_output_gpu(layer.output_gpu, layer.biases_gpu, layer.batch, layer.n, n); |
| | | cudaDeviceSynchronize(); |
| | | printf("bias %f\n", sec(clock() - time)); |
| | | time = clock(); |
| | | |
| | | float imt=0; |
| | | float gemt = 0; |
| | | for(i = 0; i < layer.batch; ++i){ |
| | | time = clock(); |
| | | im2col_ongpu(state.input + i*layer.c*layer.h*layer.w, layer.c, layer.h, layer.w, layer.size, layer.stride, layer.pad, layer.col_image_gpu); |
| | | cudaDeviceSynchronize(); |
| | | imt += sec(clock()-time); |
| | | time = clock(); |
| | | float * a = layer.filters_gpu; |
| | | float * b = layer.col_image_gpu; |
| | | float * c = layer.output_gpu; |
| | | gemm_ongpu(0,0,m,n,k,1.,a,k,b,n,1.,c+i*m*n,n); |
| | | cudaDeviceSynchronize(); |
| | | gemt += sec(clock()-time); |
| | | time = clock(); |
| | | } |
| | | activate_array_ongpu(layer.output_gpu, m*n*layer.batch, layer.activation); |
| | | cudaDeviceSynchronize(); |
| | | printf("activate %f\n", sec(clock() - time)); |
| | | printf("im2col %f\n", imt); |
| | | printf("gemm %f\n", gemt); |
| | | } |
| | | |
| | | extern "C" void backward_convolutional_layer_gpu(convolutional_layer layer, network_state state) |
| | |
| | | return x_gpu; |
| | | } |
| | | |
| | | void cuda_random(float *x_gpu, int n) |
| | | { |
| | | static curandGenerator_t gen; |
| | | static int init = 0; |
| | | if(!init){ |
| | | curandCreateGenerator(&gen, CURAND_RNG_PSEUDO_DEFAULT); |
| | | curandSetPseudoRandomGeneratorSeed(gen, 0ULL); |
| | | } |
| | | curandGenerateUniform(gen, x_gpu, n); |
| | | check_error(cudaPeekAtLastError()); |
| | | } |
| | | |
| | | float cuda_compare(float *x_gpu, float *x, int n, char *s) |
| | | { |
| | | float *tmp = calloc(n, sizeof(float)); |
| | |
| | | #define BLOCK 256 |
| | | |
| | | #include "cuda_runtime.h" |
| | | #include "curand.h" |
| | | #include "cublas_v2.h" |
| | | |
| | | void check_error(cudaError_t status); |
| | |
| | | void cuda_push_array(float *x_gpu, float *x, int n); |
| | | void cuda_pull_array(float *x_gpu, float *x, int n); |
| | | void cuda_free(float *x_gpu); |
| | | void cuda_random(float *x_gpu, int n); |
| | | float cuda_compare(float *x_gpu, float *x, int n, char *s); |
| | | dim3 cuda_gridsize(size_t n); |
| | | |
| | |
| | | randomize_boxes(boxes, count); |
| | | float x, y, h, w; |
| | | int id; |
| | | int i, j; |
| | | int i; |
| | | if(background){ |
| | | for(i = 0; i < num_height*num_width*(4+classes+background); i += 4+classes+background){ |
| | | truth[i] = 1; |
| | | } |
| | | } |
| | | for(i = 0; i < count; ++i){ |
| | | x = boxes[i].x; |
| | | y = boxes[i].y; |
| | |
| | | |
| | | int index = (i+j*num_width)*(4+classes+background); |
| | | if(truth[index+classes+background]) continue; |
| | | if(background) truth[index++] = 0; |
| | | truth[index+id] = 1; |
| | | index += classes+background; |
| | | index += classes; |
| | | truth[index++] = dh; |
| | | truth[index++] = dw; |
| | | truth[index++] = h*(height+jitter)/height; |
| | | truth[index++] = w*(width+jitter)/width; |
| | | } |
| | | free(boxes); |
| | | if(background){ |
| | | for(i = 0; i < num_height*num_width*(4+classes+background); i += 4+classes+background){ |
| | | int object = 0; |
| | | for(j = i; j < i+classes; ++j) if (truth[j]) object = 1; |
| | | truth[i+classes] = !object; |
| | | } |
| | | } |
| | | } |
| | | |
| | | #define NUMCHARS 37 |
| | |
| | | data d; |
| | | d.shallow = 0; |
| | | d.X = load_image_paths(paths, n, h, w); |
| | | d.X.cols = 17100; |
| | | d.y = d.X; |
| | | if(m) free(paths); |
| | | return d; |
| | |
| | | char **paths = (char **)list_to_array(plist); |
| | | int im_size = 448; |
| | | int classes = 20; |
| | | int background = 0; |
| | | int background = 1; |
| | | int num_output = 7*7*(4+classes+background); |
| | | |
| | | int m = plist->size; |
| | |
| | | float x = (c + pred.vals[j][ci + 1])/7.; |
| | | float h = pred.vals[j][ci + 2]; |
| | | float w = pred.vals[j][ci + 3]; |
| | | printf("%d %d %f %f %f %f %f\n", (i-1)*m/splits + j, class, pred.vals[j][k+class], y, x, h, w); |
| | | printf("%d %d %f %f %f %f %f\n", (i-1)*m/splits + j, class, pred.vals[j][k+class+background], y, x, h, w); |
| | | } |
| | | } |
| | | } |
| | |
| | | |
| | | int get_detection_layer_locations(detection_layer layer) |
| | | { |
| | | return layer.inputs / (layer.classes+layer.coords+layer.rescore); |
| | | return layer.inputs / (layer.classes+layer.coords+layer.rescore+layer.background); |
| | | } |
| | | |
| | | int get_detection_layer_output_size(detection_layer layer) |
| | | { |
| | | return get_detection_layer_locations(layer)*(layer.classes+layer.coords); |
| | | return get_detection_layer_locations(layer)*(layer.background + layer.classes + layer.coords); |
| | | } |
| | | |
| | | detection_layer *make_detection_layer(int batch, int inputs, int classes, int coords, int rescore) |
| | | detection_layer *make_detection_layer(int batch, int inputs, int classes, int coords, int rescore, int background) |
| | | { |
| | | detection_layer *layer = calloc(1, sizeof(detection_layer)); |
| | | |
| | | |
| | | layer->batch = batch; |
| | | layer->inputs = inputs; |
| | | layer->classes = classes; |
| | | layer->coords = coords; |
| | | layer->rescore = rescore; |
| | | layer->background = background; |
| | | int outputs = get_detection_layer_output_size(*layer); |
| | | layer->output = calloc(batch*outputs, sizeof(float)); |
| | | layer->delta = calloc(batch*outputs, sizeof(float)); |
| | |
| | | return layer; |
| | | } |
| | | |
| | | |
| | | void forward_detection_layer(const detection_layer layer, network_state state) |
| | | void dark_zone(detection_layer layer, int class, int start, network_state state) |
| | | { |
| | | int in_i = 0; |
| | | int out_i = 0; |
| | | int locations = get_detection_layer_locations(layer); |
| | | int i,j; |
| | | for(i = 0; i < layer.batch*locations; ++i){ |
| | | int mask = (!state.truth || state.truth[out_i + layer.classes + 2]); |
| | | float scale = 1; |
| | | if(layer.rescore) scale = state.input[in_i++]; |
| | | for(j = 0; j < layer.classes; ++j){ |
| | | layer.output[out_i++] = scale*state.input[in_i++]; |
| | | } |
| | | if(!layer.rescore){ |
| | | softmax_array(layer.output + out_i - layer.classes, layer.classes, layer.output + out_i - layer.classes); |
| | | activate_array(state.input+in_i, layer.coords, LOGISTIC); |
| | | } |
| | | for(j = 0; j < layer.coords; ++j){ |
| | | layer.output[out_i++] = mask*state.input[in_i++]; |
| | | } |
| | | } |
| | | } |
| | | |
| | | void dark_zone(detection_layer layer, int index, network_state state) |
| | | { |
| | | int size = layer.classes+layer.rescore+layer.coords; |
| | | int index = start+layer.background+class; |
| | | int size = layer.classes+layer.coords+layer.background; |
| | | int location = (index%(7*7*size)) / size ; |
| | | int r = location / 7; |
| | | int c = location % 7; |
| | | int class = index%size; |
| | | if(layer.rescore) --class; |
| | | int dr, dc; |
| | | for(dr = -1; dr <= 1; ++dr){ |
| | | for(dc = -1; dc <= 1; ++dc){ |
| | |
| | | if((c + dc) > 6 || (c + dc) < 0) continue; |
| | | int di = (dr*7 + dc) * size; |
| | | if(state.truth[index+di]) continue; |
| | | layer.delta[index + di] = 0; |
| | | layer.output[index + di] = 0; |
| | | //if(!state.truth[start+di]) continue; |
| | | //layer.output[start + di] = 1; |
| | | } |
| | | } |
| | | } |
| | | |
| | | void forward_detection_layer(const detection_layer layer, network_state state) |
| | | { |
| | | int in_i = 0; |
| | | int out_i = 0; |
| | | int locations = get_detection_layer_locations(layer); |
| | | int i,j; |
| | | for(i = 0; i < layer.batch*locations; ++i){ |
| | | int mask = (!state.truth || state.truth[out_i + layer.background + layer.classes + 2]); |
| | | float scale = 1; |
| | | if(layer.rescore) scale = state.input[in_i++]; |
| | | if(layer.background) layer.output[out_i++] = scale*state.input[in_i++]; |
| | | |
| | | for(j = 0; j < layer.classes; ++j){ |
| | | layer.output[out_i++] = scale*state.input[in_i++]; |
| | | } |
| | | if(layer.background){ |
| | | softmax_array(layer.output + out_i - layer.classes-layer.background, layer.classes+layer.background, layer.output + out_i - layer.classes-layer.background); |
| | | activate_array(state.input+in_i, layer.coords, LOGISTIC); |
| | | } |
| | | for(j = 0; j < layer.coords; ++j){ |
| | | layer.output[out_i++] = mask*state.input[in_i++]; |
| | | } |
| | | } |
| | | if(layer.background || 1){ |
| | | for(i = 0; i < layer.batch*locations; ++i){ |
| | | int index = i*(layer.classes+layer.coords+layer.background); |
| | | for(j= 0; j < layer.classes; ++j){ |
| | | if(state.truth[index+j+layer.background]){ |
| | | //dark_zone(layer, j, index, state); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | } |
| | |
| | | float scale = 1; |
| | | float latent_delta = 0; |
| | | if(layer.rescore) scale = state.input[in_i++]; |
| | | if(!layer.rescore){ |
| | | for(j = 0; j < layer.classes-1; ++j){ |
| | | if(state.truth[out_i + j]) dark_zone(layer, out_i+j, state); |
| | | } |
| | | } |
| | | if(layer.background) state.delta[in_i++] = scale*layer.delta[out_i++]; |
| | | for(j = 0; j < layer.classes; ++j){ |
| | | latent_delta += state.input[in_i]*layer.delta[out_i]; |
| | | state.delta[in_i++] = scale*layer.delta[out_i++]; |
| | | } |
| | | |
| | | if (!layer.rescore) gradient_array(layer.output + out_i, layer.coords, LOGISTIC, layer.delta + out_i); |
| | | if (layer.background) gradient_array(layer.output + out_i, layer.coords, LOGISTIC, layer.delta + out_i); |
| | | for(j = 0; j < layer.coords; ++j){ |
| | | state.delta[in_i++] = layer.delta[out_i++]; |
| | | } |
| | | if(layer.rescore) state.delta[in_i-layer.coords-layer.classes-layer.rescore] = latent_delta; |
| | | if(layer.rescore) state.delta[in_i-layer.coords-layer.classes-layer.rescore-layer.background] = latent_delta; |
| | | } |
| | | } |
| | | |
| | |
| | | int inputs; |
| | | int classes; |
| | | int coords; |
| | | int background; |
| | | int rescore; |
| | | float *output; |
| | | float *delta; |
| | |
| | | #endif |
| | | } detection_layer; |
| | | |
| | | detection_layer *make_detection_layer(int batch, int inputs, int classes, int coords, int rescore); |
| | | detection_layer *make_detection_layer(int batch, int inputs, int classes, int coords, int rescore, int background); |
| | | void forward_detection_layer(const detection_layer layer, network_state state); |
| | | void backward_detection_layer(const detection_layer layer, network_state state); |
| | | int get_detection_layer_output_size(detection_layer layer); |
| | |
| | | extern "C" void forward_dropout_layer_gpu(dropout_layer layer, network_state state) |
| | | { |
| | | if (!state.train) return; |
| | | int j; |
| | | int size = layer.inputs*layer.batch; |
| | | for(j = 0; j < size; ++j) layer.rand[j] = rand_uniform(); |
| | | cuda_push_array(layer.rand_gpu, layer.rand, layer.inputs*layer.batch); |
| | | cuda_random(layer.rand_gpu, size); |
| | | |
| | | yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(state.input, size, layer.rand_gpu, layer.probability, layer.scale); |
| | | check_error(cudaPeekAtLastError()); |
| | |
| | | load_weights(&net, weightfile); |
| | | } |
| | | printf("Learning Rate: %g, Momentum: %g, Decay: %g\n", net.learning_rate, net.momentum, net.decay); |
| | | int imgs = 1024; |
| | | int imgs = 128; |
| | | int i = net.seen/imgs; |
| | | char **labels = get_labels("/home/pjreddie/data/imagenet/cls.labels.list"); |
| | | list *plist = get_paths("/data/imagenet/cls.train.list"); |
| | |
| | | { |
| | | int i; |
| | | for(i = 0; i < net.n; ++i){ |
| | | //clock_t time = clock(); |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | forward_convolutional_layer_gpu(*(convolutional_layer *)net.layers[i], state); |
| | | } |
| | |
| | | forward_crop_layer_gpu(*(crop_layer *)net.layers[i], state); |
| | | } |
| | | state.input = get_network_output_gpu_layer(net, i); |
| | | //cudaDeviceSynchronize(); |
| | | //printf("forw %d: %s %f\n", i, get_layer_string(net.types[i]), sec(clock() - time)); |
| | | //time = clock(); |
| | | } |
| | | } |
| | | |
| | |
| | | int i; |
| | | float * original_input = state.input; |
| | | for(i = net.n-1; i >= 0; --i){ |
| | | //clock_t time = clock(); |
| | | //clock_t time = clock(); |
| | | if(i == 0){ |
| | | state.input = original_input; |
| | | state.delta = 0; |
| | |
| | | else if(net.types[i] == SOFTMAX){ |
| | | backward_softmax_layer_gpu(*(softmax_layer *)net.layers[i], state); |
| | | } |
| | | //cudaDeviceSynchronize(); |
| | | //printf("back %d: %s %f\n", i, get_layer_string(net.types[i]), sec(clock() - time)); |
| | | //time = clock(); |
| | | } |
| | | } |
| | | |
| | |
| | | |
| | | float train_network_datum_gpu(network net, float *x, float *y) |
| | | { |
| | | //clock_t time = clock(); |
| | | // clock_t time = clock(); |
| | | network_state state; |
| | | int x_size = get_network_input_size(net)*net.batch; |
| | | int y_size = get_network_output_size(net)*net.batch; |
| | |
| | | state.input = *net.input_gpu; |
| | | state.truth = *net.truth_gpu; |
| | | state.train = 1; |
| | | //printf("trans %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | //cudaDeviceSynchronize(); |
| | | //printf("trans %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | forward_network_gpu(net, state); |
| | | //printf("forw %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | //cudaDeviceSynchronize(); |
| | | //printf("forw %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | backward_network_gpu(net, state); |
| | | //printf("back %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | //cudaDeviceSynchronize(); |
| | | //printf("back %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | update_network_gpu(net); |
| | | float error = get_network_cost(net); |
| | | |
| | | //print_letters(y, 50); |
| | | //float *out = get_network_output_gpu(net); |
| | | //print_letters(out, 50); |
| | | //printf("updt %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | //cudaDeviceSynchronize(); |
| | | //printf("updt %f\n", sec(clock() - time)); |
| | | //time = clock(); |
| | | return error; |
| | | } |
| | | |
| | |
| | | |
| | | float *network_predict_gpu(network net, float *input) |
| | | { |
| | | |
| | | int size = get_network_input_size(net) * net.batch; |
| | | network_state state; |
| | | state.input = cuda_make_array(input, size); |
| | |
| | | int coords = option_find_int(options, "coords", 1); |
| | | int classes = option_find_int(options, "classes", 1); |
| | | int rescore = option_find_int(options, "rescore", 1); |
| | | detection_layer *layer = make_detection_layer(params.batch, params.inputs, classes, coords, rescore); |
| | | int background = option_find_int(options, "background", 1); |
| | | detection_layer *layer = make_detection_layer(params.batch, params.inputs, classes, coords, rescore, background); |
| | | option_unused(options); |
| | | return layer; |
| | | } |