From 028696bf15efeca3acb3db8c42a96f7b9e0f55ff Mon Sep 17 00:00:00 2001
From: iovodov <b@ovdv.ru>
Date: Thu, 03 May 2018 13:33:46 +0000
Subject: [PATCH] Output improvements for detector results: When printing detector results, output was done in random order, obfuscating results for interpreting. Now: 1. Text output includes coordinates of rects in (left,right,top,bottom in pixels) along with label and score 2. Text output is sorted by rect lefts to simplify finding appropriate rects on image 3. If several class probs are > thresh for some detection, the most probable is written first and coordinates for others are not repeated 4. Rects are imprinted in image in order by their best class prob, so most probable rects are always on top and not overlayed by less probable ones 5. Most probable label for rect is always written first Also: 6. Message about low GPU memory include required amount

---
 src/blas.c |  223 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 files changed, 220 insertions(+), 3 deletions(-)

diff --git a/src/blas.c b/src/blas.c
index 3785937..4ff0b83 100644
--- a/src/blas.c
+++ b/src/blas.c
@@ -1,5 +1,98 @@
 #include "blas.h"
-#include "math.h"
+
+#include <math.h>
+#include <assert.h>
+#include <float.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+void reorg_cpu(float *x, int out_w, int out_h, int out_c, int batch, int stride, int forward, float *out)
+{
+    int b,i,j,k;
+    int in_c = out_c/(stride*stride);
+
+	//printf("\n out_c = %d, out_w = %d, out_h = %d, stride = %d, forward = %d \n", out_c, out_w, out_h, stride, forward);
+	//printf("  in_c = %d,  in_w = %d,  in_h = %d \n", in_c, out_w*stride, out_h*stride);
+
+    for(b = 0; b < batch; ++b){
+        for(k = 0; k < out_c; ++k){
+            for(j = 0; j < out_h; ++j){
+                for(i = 0; i < out_w; ++i){
+                    int in_index  = i + out_w*(j + out_h*(k + out_c*b));
+                    int c2 = k % in_c;
+                    int offset = k / in_c;
+                    int w2 = i*stride + offset % stride;
+                    int h2 = j*stride + offset / stride;
+                    int out_index = w2 + out_w*stride*(h2 + out_h*stride*(c2 + in_c*b));
+                    if(forward) out[out_index] = x[in_index];	// used by default for forward (i.e. forward = 0)
+                    else out[in_index] = x[out_index];
+                }
+            }
+        }
+    }
+}
+
+void flatten(float *x, int size, int layers, int batch, int forward)
+{
+    float *swap = calloc(size*layers*batch, sizeof(float));
+    int i,c,b;
+    for(b = 0; b < batch; ++b){
+        for(c = 0; c < layers; ++c){
+            for(i = 0; i < size; ++i){
+                int i1 = b*layers*size + c*size + i;
+                int i2 = b*layers*size + i*layers + c;
+                if (forward) swap[i2] = x[i1];
+                else swap[i1] = x[i2];
+            }
+        }
+    }
+    memcpy(x, swap, size*layers*batch*sizeof(float));
+    free(swap);
+}
+
+void weighted_sum_cpu(float *a, float *b, float *s, int n, float *c)
+{
+    int i;
+    for(i = 0; i < n; ++i){
+        c[i] = s[i]*a[i] + (1-s[i])*(b ? b[i] : 0);
+    }
+}
+
+void weighted_delta_cpu(float *a, float *b, float *s, float *da, float *db, float *ds, int n, float *dc)
+{
+    int i;
+    for(i = 0; i < n; ++i){
+        if(da) da[i] += dc[i] * s[i];
+        if(db) db[i] += dc[i] * (1-s[i]);
+        ds[i] += dc[i] * (a[i] - b[i]);
+    }
+}
+
+void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out)
+{
+    int stride = w1/w2;
+    int sample = w2/w1;
+    assert(stride == h1/h2);
+    assert(sample == h2/h1);
+    if(stride < 1) stride = 1;
+    if(sample < 1) sample = 1;
+    int minw = (w1 < w2) ? w1 : w2;
+    int minh = (h1 < h2) ? h1 : h2;
+    int minc = (c1 < c2) ? c1 : c2;
+
+    int i,j,k,b;
+    for(b = 0; b < batch; ++b){
+        for(k = 0; k < minc; ++k){
+            for(j = 0; j < minh; ++j){
+                for(i = 0; i < minw; ++i){
+                    int out_index = i*sample + w2*(j*sample + h2*(k + c2*b));
+                    int add_index = i*stride + w1*(j*stride + h1*(k + c1*b));
+                    out[out_index] += add[add_index];
+                }
+            }
+        }
+    }
+}
 
 void mean_cpu(float *x, int batch, int filters, int spatial, float *mean)
 {
@@ -19,7 +112,7 @@
 
 void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance)
 {
-    float scale = 1./(batch * spatial);
+    float scale = 1./(batch * spatial - 1);
     int i,j,k;
     for(i = 0; i < filters; ++i){
         variance[i] = 0;
@@ -40,7 +133,7 @@
         for(f = 0; f < filters; ++f){
             for(i = 0; i < spatial; ++i){
                 int index = b*filters*spatial + f*spatial + i;
-                x[index] = (x[index] - mean[f])/(sqrt(variance[f]));
+                x[index] = (x[index] - mean[f])/(sqrt(variance[f]) + .000001f);
             }
         }
     }
@@ -76,12 +169,91 @@
     for(i = 0; i < N; ++i) X[i*INCX] *= ALPHA;
 }
 
+void fill_cpu(int N, float ALPHA, float *X, int INCX)
+{
+    int i;
+    for(i = 0; i < N; ++i) X[i*INCX] = ALPHA;
+}
+
+void deinter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
+{
+    int i, j;
+    int index = 0;
+    for(j = 0; j < B; ++j) {
+        for(i = 0; i < NX; ++i){
+            if(X) X[j*NX + i] += OUT[index];
+            ++index;
+        }
+        for(i = 0; i < NY; ++i){
+            if(Y) Y[j*NY + i] += OUT[index];
+            ++index;
+        }
+    }
+}
+
+void inter_cpu(int NX, float *X, int NY, float *Y, int B, float *OUT)
+{
+    int i, j;
+    int index = 0;
+    for(j = 0; j < B; ++j) {
+        for(i = 0; i < NX; ++i){
+            OUT[index++] = X[j*NX + i];
+        }
+        for(i = 0; i < NY; ++i){
+            OUT[index++] = Y[j*NY + i];
+        }
+    }
+}
+
 void copy_cpu(int N, float *X, int INCX, float *Y, int INCY)
 {
     int i;
     for(i = 0; i < N; ++i) Y[i*INCY] = X[i*INCX];
 }
 
+void mult_add_into_cpu(int N, float *X, float *Y, float *Z)
+{
+    int i;
+    for(i = 0; i < N; ++i) Z[i] += X[i]*Y[i];
+}
+
+void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
+{
+    int i;
+    for(i = 0; i < n; ++i){
+        float diff = truth[i] - pred[i];
+        float abs_val = fabs(diff);
+        if(abs_val < 1) {
+            error[i] = diff * diff;
+            delta[i] = diff;
+        }
+        else {
+            error[i] = 2*abs_val - 1;
+            delta[i] = (diff < 0) ? 1 : -1;
+        }
+    }
+}
+
+void l1_cpu(int n, float *pred, float *truth, float *delta, float *error)
+{
+    int i;
+    for(i = 0; i < n; ++i){
+        float diff = truth[i] - pred[i];
+        error[i] = fabs(diff);
+        delta[i] = diff > 0 ? 1 : -1;
+    }
+}
+
+void l2_cpu(int n, float *pred, float *truth, float *delta, float *error)
+{
+    int i;
+    for(i = 0; i < n; ++i){
+        float diff = truth[i] - pred[i];
+        error[i] = diff * diff;
+        delta[i] = diff;
+    }
+}
+
 float dot_cpu(int N, float *X, int INCX, float *Y, int INCY)
 {
     int i;
@@ -90,3 +262,48 @@
     return dot;
 }
 
+void softmax(float *input, int n, float temp, float *output, int stride)
+{
+    int i;
+    float sum = 0;
+    float largest = -FLT_MAX;
+    for(i = 0; i < n; ++i){
+        if(input[i*stride] > largest) largest = input[i*stride];
+    }
+    for(i = 0; i < n; ++i){
+        float e = exp(input[i*stride]/temp - largest/temp);
+        sum += e;
+        output[i*stride] = e;
+    }
+    for(i = 0; i < n; ++i){
+        output[i*stride] /= sum;
+    }
+}
+
+
+void softmax_cpu(float *input, int n, int batch, int batch_offset, int groups, int group_offset, int stride, float temp, float *output)
+{
+    int g, b;
+    for(b = 0; b < batch; ++b){
+        for(g = 0; g < groups; ++g){
+            softmax(input + b*batch_offset + g*group_offset, n, temp, output + b*batch_offset + g*group_offset, stride);
+        }
+    }
+}
+
+void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out)
+{
+	int i, j, k, b;
+	for (b = 0; b < batch; ++b) {
+		for (k = 0; k < c; ++k) {
+			for (j = 0; j < h*stride; ++j) {
+				for (i = 0; i < w*stride; ++i) {
+					int in_index = b*w*h*c + k*w*h + (j / stride)*w + i / stride;
+					int out_index = b*w*h*c*stride*stride + k*w*h*stride*stride + j*w*stride + i;
+					if (forward) out[out_index] = scale*in[in_index];
+					else in[in_index] += scale*out[out_index];
+				}
+			}
+		}
+	}
+}
\ No newline at end of file

--
Gitblit v1.10.0