Fixed im2col mistake >< face#palm
| | |
| | | GPU=1 |
| | | DEBUG=0 |
| | | ARCH= -arch=sm_35 |
| | | ARCH= -arch=sm_50 |
| | | |
| | | VPATH=./src/ |
| | | EXEC=darknet |
| | |
| | | } |
| | | } |
| | | |
| | | void col2im_ongpu(float *im, |
| | | void col2im_ongpu(float *data_col, |
| | | int channels, int height, int width, |
| | | int ksize, int stride, int pad, float *data_col){ |
| | | int ksize, int stride, int pad, float *data_im){ |
| | | // We are going to launch channels * height_col * width_col kernels, each |
| | | // kernel responsible for copying a single-channel grid. |
| | | pad = pad ? ksize/2 : 0; |
| | |
| | | BLOCK>>>( |
| | | num_kernels, data_col, height, width, ksize, pad, |
| | | stride, height_col, |
| | | width_col, im); |
| | | width_col, data_im); |
| | | } |
| | | |
| | | /* |
| | |
| | | check_error(cudaPeekAtLastError()); |
| | | } |
| | | |
| | | __global__ void backward_bias_kernel(float *bias_updates, float *delta, int batch, int n, int size, float scale) |
| | | __global__ void backward_bias_kernel(float *bias_updates, float *delta, int batch, int n, int size) |
| | | { |
| | | __shared__ float part[BLOCK]; |
| | | int i,b; |
| | |
| | | part[p] = sum; |
| | | __syncthreads(); |
| | | if(p == 0){ |
| | | for(i = 0; i < BLOCK; ++i) bias_updates[filter] += scale * part[i]; |
| | | for(i = 0; i < BLOCK; ++i) bias_updates[filter] += part[i]; |
| | | } |
| | | } |
| | | |
| | | void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size) |
| | | { |
| | | backward_bias_kernel<<<n, BLOCK>>>(bias_updates, delta, batch, n, size, 1); |
| | | backward_bias_kernel<<<n, BLOCK>>>(bias_updates, delta, batch, n, size); |
| | | check_error(cudaPeekAtLastError()); |
| | | } |
| | | |
| | |
| | | { |
| | | char *base = basecfg(cfgfile); |
| | | printf("%s\n", base); |
| | | float avg_loss = 1; |
| | | float avg_loss = -1; |
| | | network net = parse_network_cfg(cfgfile); |
| | | if(weightfile){ |
| | | load_weights(&net, weightfile); |
| | |
| | | time=clock(); |
| | | float loss = train_network(net, train); |
| | | net.seen += imgs; |
| | | if (avg_loss < 0) avg_loss = loss; |
| | | avg_loss = avg_loss*.9 + loss*.1; |
| | | printf("%d: %f, %f avg, %lf seconds, %d images\n", i, loss, avg_loss, sec(clock()-time), i*imgs); |
| | | if(i%100==0){ |
| | |
| | | char **paths = (char **)list_to_array(plist); |
| | | int im_size = 448; |
| | | int classes = 20; |
| | | int background = 1; |
| | | int nuisance = 0; |
| | | int background = 0; |
| | | int nuisance = 1; |
| | | int num_output = 7*7*(4+classes+background+nuisance); |
| | | |
| | | int m = plist->size; |
| | |
| | | for(j = 0; j < pred.rows; ++j){ |
| | | for(k = 0; k < pred.cols; k += classes+4+background+nuisance){ |
| | | float scale = 1.; |
| | | if(nuisance) scale = pred.vals[j][k]; |
| | | if(nuisance) scale = 1.-pred.vals[j][k]; |
| | | for(class = 0; class < classes; ++class){ |
| | | int index = (k)/(classes+4+background+nuisance); |
| | | int r = index/7; |
| | |
| | | } |
| | | } |
| | | /* |
| | | int count = 0; |
| | | for(i = 0; i < layer.batch*locations; ++i){ |
| | | for(j = 0; j < layer.classes+layer.background; ++j){ |
| | | printf("%f, ", layer.output[count++]); |
| | | } |
| | | printf("\n"); |
| | | for(j = 0; j < layer.coords; ++j){ |
| | | printf("%f, ", layer.output[count++]); |
| | | } |
| | | printf("\n"); |
| | | } |
| | | */ |
| | | /* |
| | | if(layer.background || 1){ |
| | | for(i = 0; i < layer.batch*locations; ++i){ |
| | | int index = i*(layer.classes+layer.coords+layer.background); |
| | |
| | | state.delta[in_i++] = scale*layer.delta[out_i++]; |
| | | } |
| | | |
| | | if (layer.nuisance) ; |
| | | else if (layer.background) gradient_array(layer.output + out_i, layer.coords, LOGISTIC, layer.delta + out_i); |
| | | if (layer.nuisance) { |
| | | |
| | | }else if (layer.background) gradient_array(layer.output + out_i, layer.coords, LOGISTIC, layer.delta + out_i); |
| | | for(j = 0; j < layer.coords; ++j){ |
| | | state.delta[in_i++] = layer.delta[out_i++]; |
| | | } |
| | |
| | | if (!state.train) return; |
| | | int size = layer.inputs*layer.batch; |
| | | cuda_random(layer.rand_gpu, size); |
| | | int i; |
| | | for(i = 0; i < size; ++i){ |
| | | layer.rand[i] = rand_uniform(); |
| | | } |
| | | cuda_push_array(layer.rand_gpu, layer.rand, size); |
| | | |
| | | yoloswag420blazeit360noscope<<<cuda_gridsize(size), BLOCK>>>(state.input, size, layer.rand_gpu, layer.probability, layer.scale); |
| | | check_error(cudaPeekAtLastError()); |
| | |
| | | state.input = get_network_output_gpu_layer(net, i-1); |
| | | state.delta = get_network_delta_gpu_layer(net, i-1); |
| | | } |
| | | |
| | | if(net.types[i] == CONVOLUTIONAL){ |
| | | backward_convolutional_layer_gpu(*(convolutional_layer *)net.layers[i], state); |
| | | } |