From a360f694093f7748e3232f4ed74d40446d735fc3 Mon Sep 17 00:00:00 2001
From: Alexey <AlexeyAB@users.noreply.github.com>
Date: Wed, 01 Mar 2017 12:29:50 +0000
Subject: [PATCH] Readme.md - When should I stop training

---
 src/utils.c |  291 +++++++++++++++++++++++++++++++++++++++++++++++++++++----
 1 files changed, 269 insertions(+), 22 deletions(-)

diff --git a/src/utils.c b/src/utils.c
index 1db8101..41e71d0 100644
--- a/src/utils.c
+++ b/src/utils.c
@@ -2,12 +2,132 @@
 #include <stdlib.h>
 #include <string.h>
 #include <math.h>
-#include <unistd.h>
+#include <assert.h>
+#include "unistd.h"
 #include <float.h>
 #include <limits.h>
 
 #include "utils.h"
 
+#pragma warning(disable: 4996)
+
+int *read_map(char *filename)
+{
+    int n = 0;
+    int *map = 0;
+    char *str;
+    FILE *file = fopen(filename, "r");
+    if(!file) file_error(filename);
+    while((str=fgetl(file))){
+        ++n;
+        map = realloc(map, n*sizeof(int));
+        map[n-1] = atoi(str);
+    }
+    return map;
+}
+
+void sorta_shuffle(void *arr, size_t n, size_t size, size_t sections)
+{
+    size_t i;
+    for(i = 0; i < sections; ++i){
+        size_t start = n*i/sections;
+        size_t end = n*(i+1)/sections;
+        size_t num = end-start;
+        shuffle((char*)arr+(start*size), num, size);
+    }
+}
+
+void shuffle(void *arr, size_t n, size_t size)
+{
+    size_t i;
+    void *swp = calloc(1, size);
+    for(i = 0; i < n-1; ++i){
+        size_t j = i + rand()/(RAND_MAX / (n-i)+1);
+        memcpy(swp,			(char*)arr+(j*size), size);
+        memcpy((char*)arr+(j*size), (char*)arr+(i*size), size);
+        memcpy((char*)arr+(i*size), swp,          size);
+    }
+}
+
+void del_arg(int argc, char **argv, int index)
+{
+    int i;
+    for(i = index; i < argc-1; ++i) argv[i] = argv[i+1];
+    argv[i] = 0;
+}
+
+int find_arg(int argc, char* argv[], char *arg)
+{
+    int i;
+    for(i = 0; i < argc; ++i) {
+        if(!argv[i]) continue;
+        if(0==strcmp(argv[i], arg)) {
+            del_arg(argc, argv, i);
+            return 1;
+        }
+    }
+    return 0;
+}
+
+int find_int_arg(int argc, char **argv, char *arg, int def)
+{
+    int i;
+    for(i = 0; i < argc-1; ++i){
+        if(!argv[i]) continue;
+        if(0==strcmp(argv[i], arg)){
+            def = atoi(argv[i+1]);
+            del_arg(argc, argv, i);
+            del_arg(argc, argv, i);
+            break;
+        }
+    }
+    return def;
+}
+
+float find_float_arg(int argc, char **argv, char *arg, float def)
+{
+    int i;
+    for(i = 0; i < argc-1; ++i){
+        if(!argv[i]) continue;
+        if(0==strcmp(argv[i], arg)){
+            def = atof(argv[i+1]);
+            del_arg(argc, argv, i);
+            del_arg(argc, argv, i);
+            break;
+        }
+    }
+    return def;
+}
+
+char *find_char_arg(int argc, char **argv, char *arg, char *def)
+{
+    int i;
+    for(i = 0; i < argc-1; ++i){
+        if(!argv[i]) continue;
+        if(0==strcmp(argv[i], arg)){
+            def = argv[i+1];
+            del_arg(argc, argv, i);
+            del_arg(argc, argv, i);
+            break;
+        }
+    }
+    return def;
+}
+
+
+char *basecfg(char *cfgfile)
+{
+    char *c = cfgfile;
+    char *next;
+    while((next = strchr(c, '/')))
+    {
+        c = next+1;
+    }
+    c = copy_string(c);
+    next = strchr(c, '.');
+    if (next) *next = 0;
+    return c;
+}
 
 int alphanum_to_int(char c)
 {
@@ -25,28 +145,27 @@
     for(i =0 ; i < M; ++i){
         printf("%d ", i+1);
         for(j = 0; j < N; ++j){
-            printf("%10.6f, ", A[i*N+j]);
+            printf("%2.4f, ", A[i*N+j]);
         }
         printf("\n");
     }
     printf("\n");
 }
 
-
-char *find_replace(char *str, char *orig, char *rep)
+void find_replace(char *str, char *orig, char *rep, char *output)
 {
-    static char buffer[4096];
+    char buffer[4096] = {0};
     char *p;
 
-    if(!(p = strstr(str, orig)))  // Is 'orig' even in 'str'?
-        return str;
+    sprintf(buffer, "%s", str);
+    if(!(p = strstr(buffer, orig))){  // Is 'orig' even in 'str'?
+        sprintf(output, "%s", str);
+        return;
+    }
 
-    strncpy(buffer, str, p-str); // Copy characters from 'str' start to 'orig' st$
-    buffer[p-str] = '\0';
+    *p = '\0';
 
-    sprintf(buffer+(p-str), "%s%s", rep, p+strlen(orig));
-
-    return buffer;
+    sprintf(output, "%s%s%s", buffer, rep, p+strlen(orig));
 }
 
 float sec(clock_t clocks)
@@ -57,11 +176,11 @@
 void top_k(float *a, int n, int k, int *index)
 {
     int i,j;
-    for(j = 0; j < k; ++j) index[j] = 0;
+    for(j = 0; j < k; ++j) index[j] = -1;
     for(i = 0; i < n; ++i){
         int curr = i;
         for(j = 0; j < k; ++j){
-            if(a[curr] > a[index[j]]){
+            if((index[j] < 0) || a[curr] > a[index[j]]){
                 int swap = curr;
                 curr = index[j];
                 index[j] = swap;
@@ -73,7 +192,8 @@
 void error(const char *s)
 {
     perror(s);
-    exit(0);
+    assert(0);
+    exit(-1);
 }
 
 void malloc_error()
@@ -110,7 +230,7 @@
     size_t offset = 0;
     for(i = 0; i < len; ++i){
         char c = s[i];
-        if(c==' '||c=='\t'||c=='\n') ++offset;
+        if(c==' '||c=='\t'||c=='\n'||c =='\r') ++offset;
         else s[i-offset] = c;
     }
     s[len-offset] = '\0';
@@ -129,6 +249,13 @@
     s[len-offset] = '\0';
 }
 
+void free_ptrs(void **ptrs, int n)
+{
+    int i;
+    for(i = 0; i < n; ++i) free(ptrs[i]);
+    free(ptrs);
+}
+
 char *fgetl(FILE *fp)
 {
     if(feof(fp)) return 0;
@@ -160,6 +287,42 @@
     return line;
 }
 
+int read_int(int fd)
+{
+    int n = 0;
+    int next = read(fd, &n, sizeof(int));
+    if(next <= 0) return -1;
+    return n;
+}
+
+void write_int(int fd, int n)
+{
+    int next = write(fd, &n, sizeof(int));
+    if(next <= 0) error("read failed");
+}
+
+int read_all_fail(int fd, char *buffer, size_t bytes)
+{
+    size_t n = 0;
+    while(n < bytes){
+        int next = read(fd, buffer + n, bytes-n);
+        if(next <= 0) return 1;
+        n += next;
+    }
+    return 0;
+}
+
+int write_all_fail(int fd, char *buffer, size_t bytes)
+{
+    size_t n = 0;
+    while(n < bytes){
+        size_t next = write(fd, buffer + n, bytes-n);
+        if(next <= 0) return 1;
+        n += next;
+    }
+    return 0;
+}
+
 void read_all(int fd, char *buffer, size_t bytes)
 {
     size_t n = 0;
@@ -250,6 +413,28 @@
     return sum_array(a,n)/n;
 }
 
+void mean_arrays(float **a, int n, int els, float *avg)
+{
+    int i;
+    int j;
+    memset(avg, 0, els*sizeof(float));
+    for(j = 0; j < n; ++j){
+        for(i = 0; i < els; ++i){
+            avg[i] += a[j][i];
+        }
+    }
+    for(i = 0; i < els; ++i){
+        avg[i] /= n;
+    }
+}
+
+void print_statistics(float *a, int n)
+{
+    float m = mean_array(a, n);
+    float v = variance_array(a, n);
+    printf("MSE: %.6f, Mean: %.6f, Variance: %.6f\n", mse_array(a, n), m, v);
+}
+
 float variance_array(float *a, int n)
 {
     int i;
@@ -260,13 +445,28 @@
     return variance;
 }
 
-float constrain(float a, float max)
+int constrain_int(int a, int min, int max)
 {
-    if(a > abs(max)) return abs(max);
-    if(a < -abs(max)) return -abs(max);
+    if (a < min) return min;
+    if (a > max) return max;
     return a;
 }
 
+float constrain(float min, float max, float a)
+{
+    if (a < min) return min;
+    if (a > max) return max;
+    return a;
+}
+
+float dist_array(float *a, float *b, int n, int sub)
+{
+    int i;
+    float sum = 0;
+    for(i = 0; i < n; i += sub) sum += pow(a[i]-b[i], 2);
+    return sqrt(sum);
+}
+
 float mse_array(float *a, int n)
 {
     int i;
@@ -313,6 +513,19 @@
     }
 }
 
+int sample_array(float *a, int n)
+{
+    float sum = sum_array(a, n);
+    scale_array(a, n, 1./sum);
+    float r = rand_uniform(0, 1);
+    int i;
+    for(i = 0; i < n; ++i){
+        r = r - a[i];
+        if (r <= 0) return i;
+    }
+    return n-1;
+}
+
 int max_index(float *a, int n)
 {
     if(n <= 0) return -1;
@@ -327,8 +540,18 @@
     return max_i;
 }
 
+int rand_int(int min, int max)
+{
+    if (max < min){
+        int s = min;
+        min = max;
+        max = s;
+    }
+    int r = (rand()%(max - min + 1)) + min;
+    return r;
+}
+
 // From http://en.wikipedia.org/wiki/Box%E2%80%93Muller_transform
-#define TWO_PI 6.2831853071795864769252866
 float rand_normal()
 {
     static int haveSpare = 0;
@@ -361,9 +584,33 @@
    }
  */
 
-float rand_uniform()
+size_t rand_size_t()
 {
-    return (float)rand()/RAND_MAX;
+    return  ((size_t)(rand()&0xff) << 56) | 
+            ((size_t)(rand()&0xff) << 48) |
+            ((size_t)(rand()&0xff) << 40) |
+            ((size_t)(rand()&0xff) << 32) |
+            ((size_t)(rand()&0xff) << 24) |
+            ((size_t)(rand()&0xff) << 16) |
+            ((size_t)(rand()&0xff) << 8) |
+            ((size_t)(rand()&0xff) << 0);
+}
+
+float rand_uniform(float min, float max)
+{
+    if(max < min){
+        float swap = min;
+        min = max;
+        max = swap;
+    }
+    return ((float)rand()/RAND_MAX * (max - min)) + min;
+}
+
+float rand_scale(float s)
+{
+    float scale = rand_uniform(1, s);
+    if(rand()%2) return scale;
+    return 1./scale;
 }
 
 float **one_hot_encode(float *a, int n, int k)

--
Gitblit v1.10.0