From d97331b88ff3d50035b1e22c9d0eb671b61227e3 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Wed, 15 Apr 2015 07:32:32 +0000
Subject: [PATCH] level adjustment for images

---
 src/image.c               |  172 +++++++++++++++++++++++++++-
 src/detection.c           |    7 +
 src/imagenet.c            |    7 
 src/data.c                |   24 ++-
 src/crop_layer_kernels.cu |  131 +++++++++++++++++++--
 src/data.h                |    2 
 src/image.h               |    4 
 7 files changed, 312 insertions(+), 35 deletions(-)

diff --git a/src/crop_layer_kernels.cu b/src/crop_layer_kernels.cu
index 1d20d78..3e7ee95 100644
--- a/src/crop_layer_kernels.cu
+++ b/src/crop_layer_kernels.cu
@@ -13,6 +13,71 @@
     return image[x + w*(y + c*h)];
 }
 
+__device__ float3 rgb_to_hsv_kernel(float3 rgb)
+{
+    float r = rgb.x;
+    float g = rgb.y; 
+    float b = rgb.z;
+
+    float h, s, v;
+    float max = (r > g) ? ( (r > b) ? r : b) : ( (g > b) ? g : b);
+    float min = (r < g) ? ( (r < b) ? r : b) : ( (g < b) ? g : b);
+    float delta = max - min;
+    v = max;
+    if(max == 0){
+        s = 0;
+        h = -1;
+    }else{
+        s = delta/max;
+        if(r == max){
+            h = (g - b) / delta;
+        } else if (g == max) {
+            h = 2 + (b - r) / delta;
+        } else {
+            h = 4 + (r - g) / delta;
+        }
+        if (h < 0) h += 6;
+    }
+    return make_float3(h, s, v);
+}
+
+__device__ float3 hsv_to_rgb_kernel(float3 hsv)
+{
+    float h = hsv.x;
+    float s = hsv.y; 
+    float v = hsv.z;
+
+    float r, g, b;
+    float f, p, q, t;
+
+    if (s == 0) {
+        r = g = b = v;
+    } else {
+        int index = (int) floorf(h);
+        f = h - index;
+        p = v*(1-s);
+        q = v*(1-s*f);
+        t = v*(1-s*(1-f));
+        if(index == 0){
+            r = v; g = t; b = p;
+        } else if(index == 1){
+            r = q; g = v; b = p;
+        } else if(index == 2){
+            r = p; g = v; b = t;
+        } else if(index == 3){
+            r = p; g = q; b = v;
+        } else if(index == 4){
+            r = t; g = p; b = v;
+        } else {
+            r = v; g = p; b = q;
+        }
+    }
+    r = (r < 0) ? 0 : ((r > 1) ? 1 : r);
+    g = (g < 0) ? 0 : ((g > 1) ? 1 : g);
+    b = (b < 0) ? 0 : ((b > 1) ? 1 : b);
+    return make_float3(r, g, b);
+}
+
 __device__ float billinear_interpolate_kernel(float *image, int w, int h, float x, float y, int c)
 {
     int ix = (int) floorf(x);
@@ -22,12 +87,36 @@
     float dy = y - iy;
 
     float val = (1-dy) * (1-dx) * get_pixel_kernel(image, w, h, ix, iy, c) + 
-                dy     * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + 
-                (1-dy) *   dx   * get_pixel_kernel(image, w, h, ix+1, iy, c) +
-                dy     *   dx   * get_pixel_kernel(image, w, h, ix+1, iy+1, c);
+        dy     * (1-dx) * get_pixel_kernel(image, w, h, ix, iy+1, c) + 
+        (1-dy) *   dx   * get_pixel_kernel(image, w, h, ix+1, iy, c) +
+        dy     *   dx   * get_pixel_kernel(image, w, h, ix+1, iy+1, c);
     return val;
 }
 
+__global__ void levels_image_kernel(float *image, int batch, int w, int h, float saturation, float exposure, float translate, float scale)
+{
+    int size = batch * w * h;
+    int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
+    if(id >= size) return;
+    int x = id % w;
+    id /= w;
+    int y = id % h;
+    id /= h;
+    size_t offset = id * h * w * 3;
+    image += offset;
+    float r = image[x + w*(y + h*2)];
+    float g = image[x + w*(y + h*1)];
+    float b = image[x + w*(y + h*0)];
+    float3 rgb = make_float3(r,g,b);
+    float3 hsv = rgb_to_hsv_kernel(rgb);
+    hsv.y *= saturation;
+    hsv.z *= exposure;
+    rgb = hsv_to_rgb_kernel(hsv);
+    image[x + w*(y + h*2)] = rgb.x*scale + translate;
+    image[x + w*(y + h*1)] = rgb.y*scale + translate;
+    image[x + w*(y + h*0)] = rgb.z*scale + translate;
+}
+
 __global__ void forward_crop_layer_kernel(float *input, int size, int c, int h, int w, int crop_height, int crop_width, int dh, int dw, int flip, float angle, float *output)
 {
     int id = (blockIdx.x + blockIdx.y*gridDim.x) * blockDim.x + threadIdx.x;
@@ -63,26 +152,44 @@
     int dw = rand()%(layer.w - layer.crop_width + 1);
     float radians = layer.angle*3.14159/180.;
     float angle = 2*radians*rand_uniform() - radians;
+
+    float saturation = rand_uniform() + 1;
+    if(rand_uniform() > .5) saturation = 1./saturation;
+    float exposure = rand_uniform() + 1;
+    if(rand_uniform() > .5) exposure = 1./exposure;
+
+    float scale = 2;
+    float translate = -1;
+
     if(!state.train){
         angle = 0;
         flip = 0;
         dh = (layer.h - layer.crop_height)/2;
         dw = (layer.w - layer.crop_width)/2;
+        saturation = 1;
+        exposure = 1;
     }
-    int size = layer.batch*layer.c*layer.crop_width*layer.crop_height;
 
-    dim3 dimBlock(BLOCK, 1, 1);
-    dim3 dimGrid((size-1)/BLOCK + 1, 1, 1);
+    int size = layer.batch * layer.w * layer.h;
+
+    levels_image_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, layer.batch, layer.w, layer.h, saturation, exposure, translate, scale);
+    check_error(cudaPeekAtLastError());
+    
+    size = layer.batch*layer.c*layer.crop_width*layer.crop_height;
 
     forward_crop_layer_kernel<<<cuda_gridsize(size), BLOCK>>>(state.input, size, layer.c, layer.h, layer.w,
-                        layer.crop_height, layer.crop_width, dh, dw, flip, angle, layer.output_gpu);
+            layer.crop_height, layer.crop_width, dh, dw, flip, angle, layer.output_gpu);
     check_error(cudaPeekAtLastError());
 
 /*
-    cuda_pull_array(layer.output_gpu, layer.output, size);
-    image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 14*(size/layer.batch));
-    show_image(im, "cropped");
-    cvWaitKey(0);
-    */
+       cuda_pull_array(layer.output_gpu, layer.output, size);
+       image im = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 0*(size/layer.batch));
+       image im2 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 1*(size/layer.batch));
+       image im3 = float_to_image(layer.crop_width, layer.crop_height, layer.c, layer.output + 2*(size/layer.batch));
+       show_image(im, "cropped");
+       show_image(im2, "cropped2");
+       show_image(im3, "cropped3");
+       cvWaitKey(0);
+       */
 }
 
diff --git a/src/data.c b/src/data.c
index 5a6a602..012d7cf 100644
--- a/src/data.c
+++ b/src/data.c
@@ -6,6 +6,8 @@
 #include <stdlib.h>
 #include <string.h>
 
+unsigned int data_seed;
+
 struct load_args{
     char **paths;
     int n;
@@ -40,7 +42,7 @@
     char **random_paths = calloc(n, sizeof(char*));
     int i;
     for(i = 0; i < n; ++i){
-        int index = rand()%m;
+        int index = rand_r(&data_seed)%m;
         random_paths[i] = paths[index];
         if(i == 0) printf("%s\n", paths[index]);
     }
@@ -57,8 +59,6 @@
 
     for(i = 0; i < n; ++i){
         image im = load_image_color(paths[i], w, h);
-        translate_image(im, -128);
-        scale_image(im, 1./128);
         X.vals[i] = im.data;
         X.cols = im.h*im.w*im.c;
     }
@@ -102,7 +102,7 @@
     int i;
     for(i = 0; i < n; ++i){
         box swap = b[i];
-        int index = rand()%n;
+        int index = rand_r(&data_seed)%n;
         b[i] = b[index];
         b[index] = swap;
     }
@@ -294,8 +294,12 @@
     d.y = make_matrix(n, k);
     for(i = 0; i < n; ++i){
         image orig = load_image_color(random_paths[i], 0, 0);
-        translate_image(orig, -128);
-        scale_image(orig, 1./128);
+        float exposure = rand_uniform()+1;
+        if(rand_uniform() > .5) exposure = 1/exposure;
+
+        float saturation = rand_uniform()+1;
+        if(rand_uniform() > .5) saturation = 1/saturation;
+
         int oh = orig.h;
         int ow = orig.w;
 
@@ -320,7 +324,7 @@
         orig = rot;
         */
 
-        int flip = rand()%2;
+        int flip = rand_r(&data_seed)%2;
         image cropped = crop_image(orig, pleft, ptop, swidth, sheight);
         float dx = ((float)pleft/ow)/sx;
         float dy = ((float)ptop /oh)/sy;
@@ -339,7 +343,7 @@
 
 void *load_detection_thread(void *ptr)
 {
-    printf("Loading data: %d\n", rand());
+    printf("Loading data: %d\n", rand_r(&data_seed));
     struct load_args a = *(struct load_args*)ptr;
     *a.d = load_data_detection_jitter_random(a.n, a.paths, a.m, a.classes, a.w, a.h, a.num_boxes, a.background);
     free(ptr);
@@ -453,7 +457,7 @@
 {
     int j;
     for(j = 0; j < n; ++j){
-        int index = rand()%d.X.rows;
+        int index = rand_r(&data_seed)%d.X.rows;
         memcpy(X+j*d.X.cols, d.X.vals[index], d.X.cols*sizeof(float));
         memcpy(y+j*d.y.cols, d.y.vals[index], d.y.cols*sizeof(float));
     }
@@ -507,7 +511,7 @@
 {
     int i;
     for(i = d.X.rows-1; i > 0; --i){
-        int index = rand()%i;
+        int index = rand_r(&data_seed)%i;
         float *swap = d.X.vals[index];
         d.X.vals[index] = d.X.vals[i];
         d.X.vals[i] = swap;
diff --git a/src/data.h b/src/data.h
index e0d84d2..8e3e1d9 100644
--- a/src/data.h
+++ b/src/data.h
@@ -5,6 +5,8 @@
 #include "matrix.h"
 #include "list.h"
 
+extern unsigned int data_seed;
+
 static inline float distance_from_edge(int x, int max)
 {
     int dx = (max/2) - x;
diff --git a/src/detection.c b/src/detection.c
index fec3980..eea6136 100644
--- a/src/detection.c
+++ b/src/detection.c
@@ -57,6 +57,7 @@
 void train_detection(char *cfgfile, char *weightfile)
 {
     srand(time(0));
+    data_seed = time(0);
     int imgnet = 0;
     char *base = basecfg(cfgfile);
     printf("%s\n", base);
@@ -94,7 +95,11 @@
 
 /*
            image im = float_to_image(net.w, net.h, 3, train.X.vals[114]);
-           draw_detection(im, train.y.vals[114], 7);
+           image copy = copy_image(im);
+           translate_image(copy, 1);
+           scale_image(copy, .5);
+           draw_detection(copy, train.y.vals[114], 7);
+           free_image(copy);
            */
 
         printf("Loaded: %lf seconds\n", sec(clock()-time));
diff --git a/src/image.c b/src/image.c
index d3fb61e..bf6ce6a 100644
--- a/src/image.c
+++ b/src/image.c
@@ -20,7 +20,7 @@
 
 void draw_box(image a, int x1, int y1, int x2, int y2, float r, float g, float b)
 {
-    normalize_image(a);
+    //normalize_image(a);
     int i;
     if(x1 < 0) x1 = 0;
     if(x1 >= a.w) x1 = a.w-1;
@@ -113,6 +113,15 @@
     return dest;
 }
 
+void constrain_image(image im)
+{
+    int i;
+    for(i = 0; i < im.w*im.h*im.c; ++i){
+        if(im.data[i] < 0) im.data[i] = 0;
+        if(im.data[i] > 1) im.data[i] = 1;
+    }
+}
+
 void normalize_image(image p)
 {
     float *min = calloc(p.c, sizeof(float));
@@ -154,7 +163,7 @@
 {
     int x,y,k;
     image copy = copy_image(p);
-    normalize_image(copy);
+    //normalize_image(copy);
 
     char buff[256];
     //sprintf(buff, "%s (%d)", name, windows);
@@ -193,7 +202,7 @@
 {
     int x,y,k;
     image copy = copy_image(p);
-    normalize_image(copy);
+    //normalize_image(copy);
 
     char buff[256];
     //sprintf(buff, "%s (%d)", name, windows);
@@ -300,7 +309,7 @@
     for(k= 0; k < c; ++k){
         for(i = 0; i < h; ++i){
             for(j = 0; j < w; ++j){
-                out.data[count++] = data[i*step + j*c + k];
+                out.data[count++] = data[i*step + j*c + k]/255.;
             }
         }
     }
@@ -327,6 +336,94 @@
     return cropped;
 }
 
+float three_way_max(float a, float b, float c)
+{
+    return (a > b) ? ( (a > c) ? a : c) : ( (b > c) ? b : c) ;
+}
+
+float three_way_min(float a, float b, float c)
+{
+    return (a < b) ? ( (a < c) ? a : c) : ( (b < c) ? b : c) ;
+}
+
+// http://www.cs.rit.edu/~ncs/color/t_convert.html
+void rgb_to_hsv(image im)
+{
+    assert(im.c == 3);
+    int i, j;
+    float r, g, b;
+    float h, s, v;
+    for(j = 0; j < im.h; ++j){
+        for(i = 0; i < im.w; ++i){
+            r = get_pixel(im, i , j, 2);
+            g = get_pixel(im, i , j, 1);
+            b = get_pixel(im, i , j, 0);
+            float max = three_way_max(r,g,b);
+            float min = three_way_min(r,g,b);
+            float delta = max - min;
+            v = max;
+            if(max == 0){
+                s = 0;
+                h = -1;
+            }else{
+                s = delta/max;
+                if(r == max){
+                    h = (g - b) / delta;
+                } else if (g == max) {
+                    h = 2 + (b - r) / delta;
+                } else {
+                    h = 4 + (r - g) / delta;
+                }
+                if (h < 0) h += 6;
+            }
+            set_pixel(im, i, j, 0, h);
+            set_pixel(im, i, j, 1, s);
+            set_pixel(im, i, j, 2, v);
+        }
+    }
+}
+
+void hsv_to_rgb(image im)
+{
+    assert(im.c == 3);
+    int i, j;
+    float r, g, b;
+    float h, s, v;
+    float f, p, q, t;
+    for(j = 0; j < im.h; ++j){
+        for(i = 0; i < im.w; ++i){
+            h = get_pixel(im, i , j, 0);
+            s = get_pixel(im, i , j, 1);
+            v = get_pixel(im, i , j, 2);
+            if (s == 0) {
+                r = g = b = v;
+            } else {
+                int index = floor(h);
+                f = h - index;
+                p = v*(1-s);
+                q = v*(1-s*f);
+                t = v*(1-s*(1-f));
+                if(index == 0){
+                    r = v; g = t; b = p;
+                } else if(index == 1){
+                    r = q; g = v; b = p;
+                } else if(index == 2){
+                    r = p; g = v; b = t;
+                } else if(index == 3){
+                    r = p; g = q; b = v;
+                } else if(index == 4){
+                    r = t; g = p; b = v;
+                } else {
+                    r = v; g = p; b = q;
+                }
+            }
+            set_pixel(im, i, j, 2, r);
+            set_pixel(im, i, j, 1, g);
+            set_pixel(im, i, j, 0, b);
+        }
+    }
+}
+
 image grayscale_image(image im)
 {
     assert(im.c == 3);
@@ -354,7 +451,7 @@
         for(j = 0; j < fore.h; ++j){
             for(i = 0; i < fore.w; ++i){
                 float val = alpha * get_pixel(fore, i, j, k) + 
-                            (1 - alpha)* get_pixel(back, i, j, k);
+                    (1 - alpha)* get_pixel(back, i, j, k);
                 set_pixel(blend, i, j, k, val);
             }
         }
@@ -362,18 +459,59 @@
     return blend;
 }
 
+void scale_image_channel(image im, int c, float v)
+{
+    int i, j;
+    for(j = 0; j < im.h; ++j){
+        for(i = 0; i < im.w; ++i){
+            float pix = get_pixel(im, i, j, c);
+            pix = pix*v;
+            set_pixel(im, i, j, c, pix);
+        }
+    }
+}
+
+void saturate_image(image im, float sat)
+{
+    rgb_to_hsv(im);
+    scale_image_channel(im, 1, sat);
+    hsv_to_rgb(im);
+    constrain_image(im);
+}
+
+void exposure_image(image im, float sat)
+{
+    rgb_to_hsv(im);
+    scale_image_channel(im, 2, sat);
+    hsv_to_rgb(im);
+    constrain_image(im);
+}
+
+void saturate_exposure_image(image im, float sat, float exposure)
+{
+    rgb_to_hsv(im);
+    scale_image_channel(im, 1, sat);
+    scale_image_channel(im, 2, exposure);
+    hsv_to_rgb(im);
+    constrain_image(im);
+}
+
+/*
 image saturate_image(image im, float sat)
 {
     image gray = grayscale_image(im);
     image blend = blend_image(im, gray, sat);
     free_image(gray);
+    constrain_image(blend);
     return blend;
 }
 
 image brightness_image(image im, float b)
 {
     image bright = make_image(im.w, im.h, im.c);
+    return bright;
 }
+*/
 
 float billinear_interpolate(image im, float x, float y, int c)
 {
@@ -413,7 +551,6 @@
 void test_resize(char *filename)
 {
     image im = load_image(filename, 0,0);
-    translate_image(im, -128);
     image small = resize_image(im, 65, 63);
     image big = resize_image(im, 513, 512);
     image crop = crop_image(im, 50, 10, 100, 100);
@@ -422,12 +559,29 @@
     image rot2 = rotate_image(big, 3.14159265/2.);
     image test = rotate_image(im, .6);
     image gray = grayscale_image(im);
-    image sat = saturate_image(im, 2);
-    image sat2 = saturate_image(im, .5);
+
+    image sat2 = copy_image(im);
+    saturate_image(sat2, 2);
+    exposure_image(sat2, 2);
+
+    image sat5 = copy_image(im);
+    saturate_image(sat5, 2);
+    exposure_image(sat5, .5);
+
+    image exp2 = copy_image(im);
+    saturate_image(exp2, .5);
+    exposure_image(exp2, 2);
+
+    image exp5 = copy_image(im);
+    saturate_image(exp5, .5);
+    exposure_image(exp5, .5);
+
     show_image(im, "original");
     show_image(gray, "gray");
-    show_image(sat, "sat");
     show_image(sat2, "sat2");
+    show_image(sat5, "sat5");
+    show_image(exp2, "exp2");
+    show_image(exp5, "exp5");
     /*
        show_image(small, "smaller");
        show_image(big, "bigger");
diff --git a/src/image.h b/src/image.h
index 66262d8..0cffc7b 100644
--- a/src/image.h
+++ b/src/image.h
@@ -22,6 +22,10 @@
 void normalize_image(image p);
 image rotate_image(image m, float rad);
 void embed_image(image source, image dest, int dx, int dy);
+void saturate_image(image im, float sat);
+void exposure_image(image im, float sat);
+void saturate_exposure_image(image im, float sat, float exposure);
+void hsv_to_rgb(image im);
 
 image collapse_image_layers(image source, int border);
 image collapse_images_horz(image *ims, int n);
diff --git a/src/imagenet.c b/src/imagenet.c
index 9e99330..906dbd4 100644
--- a/src/imagenet.c
+++ b/src/imagenet.c
@@ -4,8 +4,9 @@
 
 void train_imagenet(char *cfgfile, char *weightfile)
 {
-    float avg_loss = -1;
+    data_seed = time(0);
     srand(time(0));
+    float avg_loss = -1;
     char *base = basecfg(cfgfile);
     printf("%s\n", base);
     network net = parse_network_cfg(cfgfile);
@@ -116,8 +117,8 @@
         fgets(filename, 256, stdin);
         strtok(filename, "\n");
         image im = load_image_color(filename, 256, 256);
-        translate_image(im, -128);
-        scale_image(im, 1/128.);
+        scale_image(im, 2.);
+        translate_image(im, -1.);
         printf("%d %d %d\n", im.h, im.w, im.c);
         float *X = im.data;
         time=clock();

--
Gitblit v1.10.0