From 1b5afb45838e603fa6780762eb8cc59246dc2d81 Mon Sep 17 00:00:00 2001
From: IlyaOvodov <b@ovdv.ru>
Date: Tue, 08 May 2018 11:09:35 +0000
Subject: [PATCH] Output improvements for detector results: When printing detector results, output was done in random order, obfuscating results for interpreting. Now: 1. Text output includes coordinates of rects in (left,right,top,bottom in pixels) along with label and score 2. Text output is sorted by rect lefts to simplify finding appropriate rects on image 3. If several class probs are > thresh for some detection, the most probable is written first and coordinates for others are not repeated 4. Rects are imprinted in image in order by their best class prob, so most probable rects are always on top and not overlayed by less probable ones 5. Most probable label for rect is always written first Also: 6. Message about low GPU memory include required amount

---
 src/detection_layer.c |  614 ++++++++++++++++++++++++-------------------------------
 1 files changed, 266 insertions(+), 348 deletions(-)

diff --git a/src/detection_layer.c b/src/detection_layer.c
index 7eaabb4..0a1c107 100644
--- a/src/detection_layer.c
+++ b/src/detection_layer.c
@@ -2,396 +2,314 @@
 #include "activations.h"
 #include "softmax_layer.h"
 #include "blas.h"
+#include "box.h"
 #include "cuda.h"
 #include "utils.h"
 #include <stdio.h>
+#include <assert.h>
 #include <string.h>
 #include <stdlib.h>
 
-int get_detection_layer_locations(detection_layer layer)
+detection_layer make_detection_layer(int batch, int inputs, int n, int side, int classes, int coords, int rescore)
 {
-    return layer.inputs / (layer.classes+layer.coords+layer.rescore+layer.background);
-}
+    detection_layer l = {0};
+    l.type = DETECTION;
 
-int get_detection_layer_output_size(detection_layer layer)
-{
-    return get_detection_layer_locations(layer)*(layer.background + layer.classes + layer.coords);
-}
+    l.n = n;
+    l.batch = batch;
+    l.inputs = inputs;
+    l.classes = classes;
+    l.coords = coords;
+    l.rescore = rescore;
+    l.side = side;
+    l.w = side;
+    l.h = side;
+    assert(side*side*((1 + l.coords)*l.n + l.classes) == inputs);
+    l.cost = calloc(1, sizeof(float));
+    l.outputs = l.inputs;
+    l.truths = l.side*l.side*(1+l.coords+l.classes);
+    l.output = calloc(batch*l.outputs, sizeof(float));
+    l.delta = calloc(batch*l.outputs, sizeof(float));
 
-detection_layer *make_detection_layer(int batch, int inputs, int classes, int coords, int rescore, int background, int nuisance)
-{
-    detection_layer *layer = calloc(1, sizeof(detection_layer));
-    
-    layer->batch = batch;
-    layer->inputs = inputs;
-    layer->classes = classes;
-    layer->coords = coords;
-    layer->rescore = rescore;
-    layer->nuisance = nuisance;
-    layer->cost = calloc(1, sizeof(float));
-    layer->does_cost=1;
-    layer->background = background;
-    int outputs = get_detection_layer_output_size(*layer);
-    layer->output = calloc(batch*outputs, sizeof(float));
-    layer->delta = calloc(batch*outputs, sizeof(float));
-    #ifdef GPU
-    layer->output_gpu = cuda_make_array(0, batch*outputs);
-    layer->delta_gpu = cuda_make_array(0, batch*outputs);
-    #endif
+    l.forward = forward_detection_layer;
+    l.backward = backward_detection_layer;
+#ifdef GPU
+    l.forward_gpu = forward_detection_layer_gpu;
+    l.backward_gpu = backward_detection_layer_gpu;
+    l.output_gpu = cuda_make_array(l.output, batch*l.outputs);
+    l.delta_gpu = cuda_make_array(l.delta, batch*l.outputs);
+#endif
 
     fprintf(stderr, "Detection Layer\n");
     srand(0);
 
-    return layer;
+    return l;
 }
 
-void dark_zone(detection_layer layer, int class, int start, network_state state)
+void forward_detection_layer(const detection_layer l, network_state state)
 {
-    int index = start+layer.background+class;
-    int size = layer.classes+layer.coords+layer.background;
-    int location = (index%(7*7*size)) / size ;
-    int r = location / 7;
-    int c = location % 7;
-    int dr, dc;
-    for(dr = -1; dr <= 1; ++dr){
-        for(dc = -1; dc <= 1; ++dc){
-            if(!(dr || dc)) continue;
-            if((r + dr) > 6 || (r + dr) < 0) continue;
-            if((c + dc) > 6 || (c + dc) < 0) continue;
-            int di = (dr*7 + dc) * size;
-            if(state.truth[index+di]) continue;
-            layer.output[index + di] = 0;
-            //if(!state.truth[start+di]) continue;
-            //layer.output[start + di] = 1;
-        }
-    }
-}
-
-typedef struct{
-    float dx, dy, dw, dh;
-} dbox;
-
-dbox derivative(box a, box b)
-{
-    dbox d;
-    d.dx = 0;
-    d.dw = 0;
-    float l1 = a.x - a.w/2;
-    float l2 = b.x - b.w/2;
-    if (l1 > l2){
-        d.dx -= 1;
-        d.dw += .5;
-    }
-    float r1 = a.x + a.w/2;
-    float r2 = b.x + b.w/2;
-    if(r1 < r2){
-        d.dx += 1;
-        d.dw += .5;
-    }
-    if (l1 > r2) {
-        d.dx = -1;
-        d.dw = 0;
-    }
-    if (r1 < l2){
-        d.dx = 1;
-        d.dw = 0;
-    }
-
-    d.dy = 0;
-    d.dh = 0;
-    float t1 = a.y - a.h/2;
-    float t2 = b.y - b.h/2;
-    if (t1 > t2){
-        d.dy -= 1;
-        d.dh += .5;
-    }
-    float b1 = a.y + a.h/2;
-    float b2 = b.y + b.h/2;
-    if(b1 < b2){
-        d.dy += 1;
-        d.dh += .5;
-    }
-    if (t1 > b2) {
-        d.dy = -1;
-        d.dh = 0;
-    }
-    if (b1 < t2){
-        d.dy = 1;
-        d.dh = 0;
-    }
-    return d;
-}
-
-float overlap(float x1, float w1, float x2, float w2)
-{
-    float l1 = x1 - w1/2;
-    float l2 = x2 - w2/2;
-    float left = l1 > l2 ? l1 : l2;
-    float r1 = x1 + w1/2;
-    float r2 = x2 + w2/2;
-    float right = r1 < r2 ? r1 : r2;
-    return right - left;
-}
-
-float box_intersection(box a, box b)
-{
-    float w = overlap(a.x, a.w, b.x, b.w);
-    float h = overlap(a.y, a.h, b.y, b.h);
-    if(w < 0 || h < 0) return 0;
-    float area = w*h;
-    return area;
-}
-
-float box_union(box a, box b)
-{
-    float i = box_intersection(a, b);
-    float u = a.w*a.h + b.w*b.h - i;
-    return u;
-}
-
-float box_iou(box a, box b)
-{
-    return box_intersection(a, b)/box_union(a, b);
-}
-
-dbox dintersect(box a, box b)
-{
-    float w = overlap(a.x, a.w, b.x, b.w);
-    float h = overlap(a.y, a.h, b.y, b.h);
-    dbox dover = derivative(a, b);
-    dbox di;
-
-    di.dw = dover.dw*h;
-    di.dx = dover.dx*h;
-    di.dh = dover.dh*w;
-    di.dy = dover.dy*w;
-    if(h < 0 || w < 0){
-        di.dx = dover.dx;
-        di.dy = dover.dy;
-    }
-    return di;
-}
-
-dbox dunion(box a, box b)
-{
-    dbox du = {0,0,0,0};;
-    float w = overlap(a.x, a.w, b.x, b.w);
-    float h = overlap(a.y, a.h, b.y, b.h);
-    if(w > 0 && h > 0){
-        dbox di = dintersect(a, b);
-        du.dw = h - di.dw;
-        du.dh = w - di.dw;
-        du.dx = -di.dx;
-        du.dy = -di.dy;
-    }
-    return du;
-}
-
-dbox diou(box a, box b)
-{
-    float u = box_union(a,b);
-    float i = box_intersection(a,b);
-    dbox di = dintersect(a,b);
-    dbox du = dunion(a,b);
-    dbox dd = {0,0,0,0};
-    if(i < 0) {
-        dd.dx = b.x - a.x;
-        dd.dy = b.y - a.y;
-        dd.dw = b.w - a.w;
-        dd.dh = b.h - a.h;
-        return dd;
-    }
-    dd.dx = 2*pow((1-(i/u)),1)*(di.dx*u - du.dx*i)/(u*u);
-    dd.dy = 2*pow((1-(i/u)),1)*(di.dy*u - du.dy*i)/(u*u);
-    dd.dw = 2*pow((1-(i/u)),1)*(di.dw*u - du.dw*i)/(u*u);
-    dd.dh = 2*pow((1-(i/u)),1)*(di.dh*u - du.dh*i)/(u*u);
-    return dd;
-}
-
-void test_box()
-{
-    box a = {1, 1, 1, 1};
-    box b = {0, 0, .5, .2};
-    int count = 0;
-    while(count++ < 300){
-        dbox d = diou(a, b);
-        printf("%f %f %f %f\n", a.x, a.y, a.w, a.h);
-        a.x += .1*d.dx;
-        a.w += .1*d.dw;
-        a.y += .1*d.dy;
-        a.h += .1*d.dh;
-        printf("inter: %f\n", box_intersection(a, b));
-        printf("union: %f\n", box_union(a, b));
-        printf("IOU: %f\n", box_iou(a, b));
-        if(d.dx==0 && d.dw==0 && d.dy==0 && d.dh==0) {
-            printf("break!!!\n");
-            break;
-        }
-    }
-}
-
-void forward_detection_layer(const detection_layer layer, network_state state)
-{
-    int in_i = 0;
-    int out_i = 0;
-    int locations = get_detection_layer_locations(layer);
+    int locations = l.side*l.side;
     int i,j;
-    for(i = 0; i < layer.batch*locations; ++i){
-        int mask = (!state.truth || state.truth[out_i + layer.background + layer.classes + 2]);
-        float scale = 1;
-        if(layer.rescore) scale = state.input[in_i++];
-        else if(layer.nuisance){
-            layer.output[out_i++] = 1-state.input[in_i++];
-            scale = mask;
-        }
-        else if(layer.background) layer.output[out_i++] = scale*state.input[in_i++];
-
-        for(j = 0; j < layer.classes; ++j){
-            layer.output[out_i++] = scale*state.input[in_i++];
-        }
-        if(layer.nuisance){
-            
-        }else if(layer.background){
-            softmax_array(layer.output + out_i - layer.classes-layer.background, layer.classes+layer.background, layer.output + out_i - layer.classes-layer.background);
-            activate_array(state.input+in_i, layer.coords, LOGISTIC);
-        }
-        for(j = 0; j < layer.coords; ++j){
-            layer.output[out_i++] = mask*state.input[in_i++];
-        }
-    }
-    if(layer.does_cost){
-        *(layer.cost) = 0;
-        int size = get_detection_layer_output_size(layer) * layer.batch;
-        memset(layer.delta, 0, size * sizeof(float));
-        for(i = 0; i < layer.batch*locations; ++i){
-            int classes = layer.nuisance+layer.classes;
-            int offset = i*(classes+layer.coords);
-            for(j = offset; j < offset+classes; ++j){
-                *(layer.cost) += pow(state.truth[j] - layer.output[j], 2);
-                layer.delta[j] =  state.truth[j] - layer.output[j];
+    memcpy(l.output, state.input, l.outputs*l.batch*sizeof(float));
+    //if(l.reorg) reorg(l.output, l.w*l.h, size*l.n, l.batch, 1);
+    int b;
+    if (l.softmax){
+        for(b = 0; b < l.batch; ++b){
+            int index = b*l.inputs;
+            for (i = 0; i < locations; ++i) {
+                int offset = i*l.classes;
+                softmax(l.output + index + offset, l.classes, 1,
+                        l.output + index + offset, 1);
             }
-            box truth;
-            truth.x = state.truth[j+0];
-            truth.y = state.truth[j+1];
-            truth.w = state.truth[j+2];
-            truth.h = state.truth[j+3];
-            box out;
-            out.x = layer.output[j+0];
-            out.y = layer.output[j+1];
-            out.w = layer.output[j+2];
-            out.h = layer.output[j+3];
-            if(!(truth.w*truth.h)) continue;
-            float iou = box_iou(truth, out);
-            //printf("iou: %f\n", iou);
-            *(layer.cost) += pow((1-iou), 2);
-            dbox d = diou(out, truth);
-            layer.delta[j+0] = d.dx;
-            layer.delta[j+1] = d.dy;
-            layer.delta[j+2] = d.dw;
-            layer.delta[j+3] = d.dh;
         }
     }
-    /*
-       int count = 0;
-       for(i = 0; i < layer.batch*locations; ++i){
-       for(j = 0; j < layer.classes+layer.background; ++j){
-       printf("%f, ", layer.output[count++]);
-       }
-       printf("\n");
-       for(j = 0; j < layer.coords; ++j){
-       printf("%f, ", layer.output[count++]);
-       }
-       printf("\n");
-       }
-     */
-    /*
-       if(layer.background || 1){
-       for(i = 0; i < layer.batch*locations; ++i){
-       int index = i*(layer.classes+layer.coords+layer.background);
-       for(j= 0; j < layer.classes; ++j){
-       if(state.truth[index+j+layer.background]){
-//dark_zone(layer, j, index, state);
-}
-}
-}
-}
-     */
+    if(state.train){
+        float avg_iou = 0;
+        float avg_cat = 0;
+        float avg_allcat = 0;
+        float avg_obj = 0;
+        float avg_anyobj = 0;
+        int count = 0;
+        *(l.cost) = 0;
+        int size = l.inputs * l.batch;
+        memset(l.delta, 0, size * sizeof(float));
+        for (b = 0; b < l.batch; ++b){
+            int index = b*l.inputs;
+            for (i = 0; i < locations; ++i) {
+                int truth_index = (b*locations + i)*(1+l.coords+l.classes);
+                int is_obj = state.truth[truth_index];
+                for (j = 0; j < l.n; ++j) {
+                    int p_index = index + locations*l.classes + i*l.n + j;
+                    l.delta[p_index] = l.noobject_scale*(0 - l.output[p_index]);
+                    *(l.cost) += l.noobject_scale*pow(l.output[p_index], 2);
+                    avg_anyobj += l.output[p_index];
+                }
+
+                int best_index = -1;
+                float best_iou = 0;
+                float best_rmse = 20;
+
+                if (!is_obj){
+                    continue;
+                }
+
+                int class_index = index + i*l.classes;
+                for(j = 0; j < l.classes; ++j) {
+                    l.delta[class_index+j] = l.class_scale * (state.truth[truth_index+1+j] - l.output[class_index+j]);
+                    *(l.cost) += l.class_scale * pow(state.truth[truth_index+1+j] - l.output[class_index+j], 2);
+                    if(state.truth[truth_index + 1 + j]) avg_cat += l.output[class_index+j];
+                    avg_allcat += l.output[class_index+j];
+                }
+
+                box truth = float_to_box(state.truth + truth_index + 1 + l.classes);
+                truth.x /= l.side;
+                truth.y /= l.side;
+
+                for(j = 0; j < l.n; ++j){
+                    int box_index = index + locations*(l.classes + l.n) + (i*l.n + j) * l.coords;
+                    box out = float_to_box(l.output + box_index);
+                    out.x /= l.side;
+                    out.y /= l.side;
+
+                    if (l.sqrt){
+                        out.w = out.w*out.w;
+                        out.h = out.h*out.h;
+                    }
+
+                    float iou  = box_iou(out, truth);
+                    //iou = 0;
+                    float rmse = box_rmse(out, truth);
+                    if(best_iou > 0 || iou > 0){
+                        if(iou > best_iou){
+                            best_iou = iou;
+                            best_index = j;
+                        }
+                    }else{
+                        if(rmse < best_rmse){
+                            best_rmse = rmse;
+                            best_index = j;
+                        }
+                    }
+                }
+
+                if(l.forced){
+                    if(truth.w*truth.h < .1){
+                        best_index = 1;
+                    }else{
+                        best_index = 0;
+                    }
+                }
+                if(l.random && *(state.net.seen) < 64000){
+                    best_index = rand()%l.n;
+                }
+
+                int box_index = index + locations*(l.classes + l.n) + (i*l.n + best_index) * l.coords;
+                int tbox_index = truth_index + 1 + l.classes;
+
+                box out = float_to_box(l.output + box_index);
+                out.x /= l.side;
+                out.y /= l.side;
+                if (l.sqrt) {
+                    out.w = out.w*out.w;
+                    out.h = out.h*out.h;
+                }
+                float iou  = box_iou(out, truth);
+
+                //printf("%d,", best_index);
+                int p_index = index + locations*l.classes + i*l.n + best_index;
+                *(l.cost) -= l.noobject_scale * pow(l.output[p_index], 2);
+                *(l.cost) += l.object_scale * pow(1-l.output[p_index], 2);
+                avg_obj += l.output[p_index];
+                l.delta[p_index] = l.object_scale * (1.-l.output[p_index]);
+
+                if(l.rescore){
+                    l.delta[p_index] = l.object_scale * (iou - l.output[p_index]);
+                }
+
+                l.delta[box_index+0] = l.coord_scale*(state.truth[tbox_index + 0] - l.output[box_index + 0]);
+                l.delta[box_index+1] = l.coord_scale*(state.truth[tbox_index + 1] - l.output[box_index + 1]);
+                l.delta[box_index+2] = l.coord_scale*(state.truth[tbox_index + 2] - l.output[box_index + 2]);
+                l.delta[box_index+3] = l.coord_scale*(state.truth[tbox_index + 3] - l.output[box_index + 3]);
+                if(l.sqrt){
+                    l.delta[box_index+2] = l.coord_scale*(sqrt(state.truth[tbox_index + 2]) - l.output[box_index + 2]);
+                    l.delta[box_index+3] = l.coord_scale*(sqrt(state.truth[tbox_index + 3]) - l.output[box_index + 3]);
+                }
+
+                *(l.cost) += pow(1-iou, 2);
+                avg_iou += iou;
+                ++count;
+            }
+        }
+
+        if(0){
+            float *costs = calloc(l.batch*locations*l.n, sizeof(float));
+            for (b = 0; b < l.batch; ++b) {
+                int index = b*l.inputs;
+                for (i = 0; i < locations; ++i) {
+                    for (j = 0; j < l.n; ++j) {
+                        int p_index = index + locations*l.classes + i*l.n + j;
+                        costs[b*locations*l.n + i*l.n + j] = l.delta[p_index]*l.delta[p_index];
+                    }
+                }
+            }
+            int indexes[100];
+            top_k(costs, l.batch*locations*l.n, 100, indexes);
+            float cutoff = costs[indexes[99]];
+            for (b = 0; b < l.batch; ++b) {
+                int index = b*l.inputs;
+                for (i = 0; i < locations; ++i) {
+                    for (j = 0; j < l.n; ++j) {
+                        int p_index = index + locations*l.classes + i*l.n + j;
+                        if (l.delta[p_index]*l.delta[p_index] < cutoff) l.delta[p_index] = 0;
+                    }
+                }
+            }
+            free(costs);
+        }
+
+
+        *(l.cost) = pow(mag_array(l.delta, l.outputs * l.batch), 2);
+
+
+        printf("Detection Avg IOU: %f, Pos Cat: %f, All Cat: %f, Pos Obj: %f, Any Obj: %f, count: %d\n", avg_iou/count, avg_cat/count, avg_allcat/(count*l.classes), avg_obj/count, avg_anyobj/(l.batch*locations*l.n), count);
+        //if(l.reorg) reorg(l.delta, l.w*l.h, size*l.n, l.batch, 0);
+    }
 }
 
-void backward_detection_layer(const detection_layer layer, network_state state)
+void backward_detection_layer(const detection_layer l, network_state state)
 {
-    int locations = get_detection_layer_locations(layer);
-    int i,j;
-    int in_i = 0;
-    int out_i = 0;
-    for(i = 0; i < layer.batch*locations; ++i){
-        float scale = 1;
-        float latent_delta = 0;
-        if(layer.rescore) scale = state.input[in_i++];
-        else if (layer.nuisance)   state.delta[in_i++] = -layer.delta[out_i++];
-        else if (layer.background) state.delta[in_i++] = scale*layer.delta[out_i++];
-        for(j = 0; j < layer.classes; ++j){
-            latent_delta += state.input[in_i]*layer.delta[out_i];
-            state.delta[in_i++] = scale*layer.delta[out_i++];
-        }
+    axpy_cpu(l.batch*l.inputs, 1, l.delta, 1, state.delta, 1);
+}
 
-        if (layer.nuisance) {
-
-        }else if (layer.background) gradient_array(layer.output + out_i, layer.coords, LOGISTIC, layer.delta + out_i);
-        for(j = 0; j < layer.coords; ++j){
-            state.delta[in_i++] = layer.delta[out_i++];
+void get_detection_boxes(layer l, int w, int h, float thresh, float **probs, box *boxes, int only_objectness)
+{
+    int i,j,n;
+    float *predictions = l.output;
+    //int per_cell = 5*num+classes;
+    for (i = 0; i < l.side*l.side; ++i){
+        int row = i / l.side;
+        int col = i % l.side;
+        for(n = 0; n < l.n; ++n){
+            int index = i*l.n + n;
+            int p_index = l.side*l.side*l.classes + i*l.n + n;
+            float scale = predictions[p_index];
+            int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n)*4;
+            boxes[index].x = (predictions[box_index + 0] + col) / l.side * w;
+            boxes[index].y = (predictions[box_index + 1] + row) / l.side * h;
+            boxes[index].w = pow(predictions[box_index + 2], (l.sqrt?2:1)) * w;
+            boxes[index].h = pow(predictions[box_index + 3], (l.sqrt?2:1)) * h;
+            for(j = 0; j < l.classes; ++j){
+                int class_index = i*l.classes;
+                float prob = scale*predictions[class_index+j];
+                probs[index][j] = (prob > thresh) ? prob : 0;
+            }
+            if(only_objectness){
+                probs[index][0] = scale;
+            }
         }
-        if(layer.rescore) state.delta[in_i-layer.coords-layer.classes-layer.rescore-layer.background] = latent_delta;
     }
 }
 
 #ifdef GPU
 
-void forward_detection_layer_gpu(const detection_layer layer, network_state state)
+void forward_detection_layer_gpu(const detection_layer l, network_state state)
 {
-    int outputs = get_detection_layer_output_size(layer);
-    float *in_cpu = calloc(layer.batch*layer.inputs, sizeof(float));
+    if(!state.train){
+        copy_ongpu(l.batch*l.inputs, state.input, 1, l.output_gpu, 1);
+        return;
+    }
+
+    float *in_cpu = calloc(l.batch*l.inputs, sizeof(float));
     float *truth_cpu = 0;
     if(state.truth){
-        truth_cpu = calloc(layer.batch*outputs, sizeof(float));
-        cuda_pull_array(state.truth, truth_cpu, layer.batch*outputs);
+        int num_truth = l.batch*l.side*l.side*(1+l.coords+l.classes);
+        truth_cpu = calloc(num_truth, sizeof(float));
+        cuda_pull_array(state.truth, truth_cpu, num_truth);
     }
-    cuda_pull_array(state.input, in_cpu, layer.batch*layer.inputs);
-    network_state cpu_state;
+    cuda_pull_array(state.input, in_cpu, l.batch*l.inputs);
+    network_state cpu_state = state;
     cpu_state.train = state.train;
     cpu_state.truth = truth_cpu;
     cpu_state.input = in_cpu;
-    forward_detection_layer(layer, cpu_state);
-    cuda_push_array(layer.output_gpu, layer.output, layer.batch*outputs);
-    cuda_push_array(layer.delta_gpu, layer.delta, layer.batch*outputs);
+    forward_detection_layer(l, cpu_state);
+    cuda_push_array(l.output_gpu, l.output, l.batch*l.outputs);
+    cuda_push_array(l.delta_gpu, l.delta, l.batch*l.inputs);
     free(cpu_state.input);
     if(cpu_state.truth) free(cpu_state.truth);
 }
 
-void backward_detection_layer_gpu(detection_layer layer, network_state state)
+void backward_detection_layer_gpu(detection_layer l, network_state state)
 {
-    int outputs = get_detection_layer_output_size(layer);
-
-    float *in_cpu =    calloc(layer.batch*layer.inputs, sizeof(float));
-    float *delta_cpu = calloc(layer.batch*layer.inputs, sizeof(float));
-    float *truth_cpu = 0;
-    if(state.truth){
-        truth_cpu = calloc(layer.batch*outputs, sizeof(float));
-        cuda_pull_array(state.truth, truth_cpu, layer.batch*outputs);
-    }
-    network_state cpu_state;
-    cpu_state.train = state.train;
-    cpu_state.input = in_cpu;
-    cpu_state.truth = truth_cpu;
-    cpu_state.delta = delta_cpu;
-
-    cuda_pull_array(state.input, in_cpu, layer.batch*layer.inputs);
-    cuda_pull_array(layer.delta_gpu, layer.delta, layer.batch*outputs);
-    backward_detection_layer(layer, cpu_state);
-    cuda_push_array(state.delta, delta_cpu, layer.batch*layer.inputs);
-
-    free(in_cpu);
-    free(delta_cpu);
+    axpy_ongpu(l.batch*l.inputs, 1, l.delta_gpu, 1, state.delta, 1);
+    //copy_ongpu(l.batch*l.inputs, l.delta_gpu, 1, state.delta, 1);
 }
 #endif
 
+void get_detection_detections(layer l, int w, int h, float thresh, detection *dets)
+{
+	int i, j, n;
+	float *predictions = l.output;
+	//int per_cell = 5*num+classes;
+	for (i = 0; i < l.side*l.side; ++i) {
+		int row = i / l.side;
+		int col = i % l.side;
+		for (n = 0; n < l.n; ++n) {
+			int index = i*l.n + n;
+			int p_index = l.side*l.side*l.classes + i*l.n + n;
+			float scale = predictions[p_index];
+			int box_index = l.side*l.side*(l.classes + l.n) + (i*l.n + n) * 4;
+			box b;
+			b.x = (predictions[box_index + 0] + col) / l.side * w;
+			b.y = (predictions[box_index + 1] + row) / l.side * h;
+			b.w = pow(predictions[box_index + 2], (l.sqrt ? 2 : 1)) * w;
+			b.h = pow(predictions[box_index + 3], (l.sqrt ? 2 : 1)) * h;
+			dets[index].bbox = b;
+			dets[index].objectness = scale;
+			for (j = 0; j < l.classes; ++j) {
+				int class_index = i*l.classes;
+				float prob = scale*predictions[class_index + j];
+				dets[index].prob[j] = (prob > thresh) ? prob : 0;
+			}
+		}
+	}
+}
\ No newline at end of file

--
Gitblit v1.10.0