From 880cf187d87c904f5fe574802ecff99118643f2d Mon Sep 17 00:00:00 2001
From: AlexeyAB <alexeyab84@gmail.com>
Date: Fri, 09 Mar 2018 16:44:46 +0000
Subject: [PATCH] Fixed multi-GPU training for Tensor Cores

---
 src/utils.c |  318 ++++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 292 insertions(+), 26 deletions(-)

diff --git a/src/utils.c b/src/utils.c
index 6fb0e43..d6bdbf6 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -2,12 +2,121 @@
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>
-#include <unistd.h>
+#include <assert.h>
 #include <float.h>
 #include <limits.h>
-
+#ifdef WIN32
+#include "unistd.h"
+#else
+#include <unistd.h>
+#endif
 #include "utils.h"
 
+#pragma warning(disable: 4996)
+
+int *read_map(char *filename)
+{
+    int n = 0;
+    int *map = 0;
+    char *str;
+    FILE *file = fopen(filename, "r");
+    if(!file) file_error(filename);
+    while((str=fgetl(file))){
+        ++n;
+        map = realloc(map, n*sizeof(int));
+        map[n-1] = atoi(str);
+    }
+    return map;
+}
+
+void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections)
+{
+    size_t i;
+    for(i = 0; i < sections; ++i){
+        size_t start = n*i/sections;
+        size_t end = n*(i+1)/sections;
+        size_t num = end-start;
+        shuffle((char*)arr+(start*size), num, size);
+    }
+}
+
+void shuffle(void *arr, size_t n, size_t size)
+{
+    size_t i;
+    void *swp = calloc(1, size);
+    for(i = 0; i < n-1; ++i){
+        size_t j = i + rand()/(RAND_MAX / (n-i)+1);
+        memcpy(swp,			(char*)arr+(j*size), size);
+        memcpy((char*)arr+(j*size), (char*)arr+(i*size), size);
+        memcpy((char*)arr+(i*size), swp,          size);
+    }
+}
+
+void del_arg(int argc, char **argv, int index)
+{
+    int i;
+    for(i = index; i < argc-1; ++i) argv[i] = argv[i+1];
+    argv[i] = 0;
+}
+
+int find_arg(int argc, char* argv[], char *arg)
+{
+    int i;
+    for(i = 0; i < argc; ++i) {
+        if(!argv[i]) continue;
+        if(0==strcmp(argv[i], arg)) {
+            del_arg(argc, argv, i);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+int find_int_arg(int argc, char **argv, char *arg, int def)
+{
+    int i;
+    for(i = 0; i < argc-1; ++i){
+        if(!argv[i]) continue;
+        if(0==strcmp(argv[i], arg)){
+            def = atoi(argv[i+1]);
+            del_arg(argc, argv, i);
+            del_arg(argc, argv, i);
+            break;
+        }
+    }
+    return def;
+}
+
+float find_float_arg(int argc, char **argv, char *arg, float def)
+{
+    int i;
+    for(i = 0; i < argc-1; ++i){
+        if(!argv[i]) continue;
+        if(0==strcmp(argv[i], arg)){
+            def = atof(argv[i+1]);
+            del_arg(argc, argv, i);
+            del_arg(argc, argv, i);
+            break;
+        }
+    }
+    return def;
+}
+
+char *find_char_arg(int argc, char **argv, char *arg, char *def)
+{
+    int i;
+    for(i = 0; i < argc-1; ++i){
+        if(!argv[i]) continue;
+        if(0==strcmp(argv[i], arg)){
+            def = argv[i+1];
+            del_arg(argc, argv, i);
+            del_arg(argc, argv, i);
+            break;
+        }
+    }
+    return def;
+}
+
 
 char *basecfg(char *cfgfile)
 {
@@ -17,9 +126,8 @@
     {
         c = next+1;
     }
+	if(!next) while ((next = strchr(c, '\\'))) { c = next + 1; }
     c = copy_string(c);
-    next = strchr(c, '_');
-    if (next) *next = 0;
     next = strchr(c, '.');
     if (next) *next = 0;
     return c;
@@ -41,28 +149,27 @@
     for(i =0 ; i < M; ++i){
         printf("%d ", i+1);
         for(j = 0; j < N; ++j){
-            printf("%10.6f, ", A[i*N+j]);
+            printf("%2.4f, ", A[i*N+j]);
         }
         printf("\n");
     }
     printf("\n");
 }
 
-
-char *find_replace(char *str, char *orig, char *rep)
+void find_replace(char *str, char *orig, char *rep, char *output)
 {
-    static char buffer[4096];
+    char buffer[4096] = {0};
     char *p;
 
-    if(!(p = strstr(str, orig)))  // Is 'orig' even in 'str'?
-        return str;
+    sprintf(buffer, "%s", str);
+    if(!(p = strstr(buffer, orig))){  // Is 'orig' even in 'str'?
+        sprintf(output, "%s", str);
+        return;
+    }
 
-    strncpy(buffer, str, p-str); // Copy characters from 'str' start to 'orig' st$
-    buffer[p-str] = '\0';
+    *p = '\0';
 
-    sprintf(buffer+(p-str), "%s%s", rep, p+strlen(orig));
-
-    return buffer;
+    sprintf(output, "%s%s%s", buffer, rep, p+strlen(orig));
 }
 
 float sec(clock_t clocks)
@@ -73,11 +180,11 @@
 void top_k(float *a, int n, int k, int *index)
 {
     int i,j;
-    for(j = 0; j < k; ++j) index[j] = 0;
+    for(j = 0; j < k; ++j) index[j] = -1;
     for(i = 0; i < n; ++i){
         int curr = i;
         for(j = 0; j < k; ++j){
-            if(a[curr] > a[index[j]]){
+            if((index[j] < 0) || a[curr] > a[index[j]]){
                 int swap = curr;
                 curr = index[j];
                 index[j] = swap;
@@ -89,7 +196,8 @@
 void error(const char *s)
 {
     perror(s);
-    exit(0);
+    assert(0);
+    exit(-1);
 }
 
 void malloc_error()
@@ -126,7 +234,7 @@
     size_t offset = 0;
     for(i = 0; i < len; ++i){
         char c = s[i];
-        if(c==' '||c=='\t'||c=='\n') ++offset;
+        if(c==' '||c=='\t'||c=='\n'||c =='\r') ++offset;
         else s[i-offset] = c;
     }
     s[len-offset] = '\0';
@@ -145,6 +253,13 @@
     s[len-offset] = '\0';
 }
 
+void free_ptrs(void **ptrs, int n)
+{
+    int i;
+    for(i = 0; i < n; ++i) free(ptrs[i]);
+    free(ptrs);
+}
+
 char *fgetl(FILE *fp)
 {
     if(feof(fp)) return 0;
@@ -171,11 +286,48 @@
         fgets(&line[curr], readsize, fp);
         curr = strlen(line);
     }
-    if(line[curr-1] == '\n') line[curr-1] = '\0';
+    if(line[curr-2] == 0x0d) line[curr-2] = 0x00;
+    if(line[curr-1] == 0x0a) line[curr-1] = 0x00;
 
     return line;
 }
 
+int read_int(int fd)
+{
+    int n = 0;
+    int next = read(fd, &n, sizeof(int));
+    if(next <= 0) return -1;
+    return n;
+}
+
+void write_int(int fd, int n)
+{
+    int next = write(fd, &n, sizeof(int));
+    if(next <= 0) error("read failed");
+}
+
+int read_all_fail(int fd, char *buffer, size_t bytes)
+{
+    size_t n = 0;
+    while(n < bytes){
+        int next = read(fd, buffer + n, bytes-n);
+        if(next <= 0) return 1;
+        n += next;
+    }
+    return 0;
+}
+
+int write_all_fail(int fd, char *buffer, size_t bytes)
+{
+    size_t n = 0;
+    while(n < bytes){
+        size_t next = write(fd, buffer + n, bytes-n);
+        if(next <= 0) return 1;
+        n += next;
+    }
+    return 0;
+}
+
 void read_all(int fd, char *buffer, size_t bytes)
 {
     size_t n = 0;
@@ -266,6 +418,28 @@
     return sum_array(a,n)/n;
 }
 
+void mean_arrays(float **a, int n, int els, float *avg)
+{
+    int i;
+    int j;
+    memset(avg, 0, els*sizeof(float));
+    for(j = 0; j < n; ++j){
+        for(i = 0; i < els; ++i){
+            avg[i] += a[j][i];
+        }
+    }
+    for(i = 0; i < els; ++i){
+        avg[i] /= n;
+    }
+}
+
+void print_statistics(float *a, int n)
+{
+    float m = mean_array(a, n);
+    float v = variance_array(a, n);
+    printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v);
+}
+
 float variance_array(float *a, int n)
 {
     int i;
@@ -276,13 +450,28 @@
     return variance;
 }
 
-float constrain(float a, float max)
+int constrain_int(int a, int min, int max)
 {
-    if(a > abs(max)) return abs(max);
-    if(a < -abs(max)) return -abs(max);
+    if (a < min) return min;
+    if (a > max) return max;
     return a;
 }
 
+float constrain(float min, float max, float a)
+{
+    if (a < min) return min;
+    if (a > max) return max;
+    return a;
+}
+
+float dist_array(float *a, float *b, int n, int sub)
+{
+    int i;
+    float sum = 0;
+    for(i = 0; i < n; i += sub) sum += pow(a[i]-b[i], 2);
+    return sqrt(sum);
+}
+
 float mse_array(float *a, int n)
 {
     int i;
@@ -329,6 +518,19 @@
     }
 }
 
+int sample_array(float *a, int n)
+{
+    float sum = sum_array(a, n);
+    scale_array(a, n, 1./sum);
+    float r = rand_uniform(0, 1);
+    int i;
+    for(i = 0; i < n; ++i){
+        r = r - a[i];
+        if (r <= 0) return i;
+    }
+    return n-1;
+}
+
 int max_index(float *a, int n)
 {
     if(n <= 0) return -1;
@@ -343,8 +545,18 @@
     return max_i;
 }
 
+int rand_int(int min, int max)
+{
+    if (max < min){
+        int s = min;
+        min = max;
+        max = s;
+    }
+    int r = (rand()%(max - min + 1)) + min;
+    return r;
+}
+
 // From http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
-#define TWO_PI 6.2831853071795864769252866
 float rand_normal()
 {
     static int haveSpare = 0;
@@ -377,9 +589,34 @@
    }
  */
 
-float rand_uniform()
+size_t rand_size_t()
 {
-    return (float)rand()/RAND_MAX;
+    return  ((size_t)(rand()&0xff) << 56) | 
+            ((size_t)(rand()&0xff) << 48) |
+            ((size_t)(rand()&0xff) << 40) |
+            ((size_t)(rand()&0xff) << 32) |
+            ((size_t)(rand()&0xff) << 24) |
+            ((size_t)(rand()&0xff) << 16) |
+            ((size_t)(rand()&0xff) << 8) |
+            ((size_t)(rand()&0xff) << 0);
+}
+
+float rand_uniform(float min, float max)
+{
+    if(max < min){
+        float swap = min;
+        min = max;
+        max = swap;
+    }
+    return ((float)rand()/RAND_MAX * (max - min)) + min;
+	//return (random_float() * (max - min)) + min;
+}
+
+float rand_scale(float s)
+{
+    float scale = rand_uniform_strong(1, s);
+    if(random_gen()%2) return scale;
+    return 1./scale;
 }
 
 float **one_hot_encode(float *a, int n, int k)
@@ -394,3 +631,32 @@
     return t;
 }
 
+unsigned int random_gen()
+{
+	unsigned int rnd = 0;
+#ifdef WIN32
+	rand_s(&rnd);
+#else
+	rnd = rand();
+#endif
+	return rnd;
+}
+
+float random_float()
+{
+#ifdef WIN32
+	return ((float)random_gen() / (float)UINT_MAX);
+#else
+	return ((float)random_gen() / (float)RAND_MAX);
+#endif
+}
+
+float rand_uniform_strong(float min, float max)
+{
+	if (max < min) {
+		float swap = min;
+		min = max;
+		max = swap;
+	}
+	return (random_float() * (max - min)) + min;
+}
\ No newline at end of file

--
Gitblit v1.10.0