From c7b10ceadb1a78e7480d281444a31ae2a7dc1b05 Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Fri, 06 May 2016 23:25:16 +0000
Subject: [PATCH] so much need to commit
---
src/data.c | 143 +++++++++++++++++++++++++++++++++++++++++++----
1 files changed, 130 insertions(+), 13 deletions(-)
diff --git a/src/data.c b/src/data.c
index c429a73..b0368ee 100644
--- a/src/data.c
+++ b/src/data.c
@@ -22,6 +22,19 @@
return lines;
}
+char **get_random_paths_indexes(char **paths, int n, int m, int *indexes)
+{
+ char **random_paths = calloc(n, sizeof(char*));
+ int i;
+ for(i = 0; i < n; ++i){
+ int index = rand_r(&data_seed)%m;
+ indexes[i] = index;
+ random_paths[i] = paths[index];
+ if(i == 0) printf("%s\n", paths[index]);
+ }
+ return random_paths;
+}
+
char **get_random_paths(char **paths, int n, int m)
{
char **random_paths = calloc(n, sizeof(char*));
@@ -95,6 +108,11 @@
image crop = random_crop_image(im, min, max, size);
int flip = rand_r(&data_seed)%2;
if (flip) flip_image(crop);
+ /*
+ show_image(im, "orig");
+ show_image(crop, "crop");
+ cvWaitKey(0);
+ */
free_image(im);
X.vals[i] = crop.data;
X.cols = crop.h*crop.w*crop.c;
@@ -359,7 +377,7 @@
data load_data_captcha(char **paths, int n, int m, int k, int w, int h)
{
if(m) paths = get_random_paths(paths, n, m);
- data d;
+ data d = {0};
d.shallow = 0;
d.X = load_image_paths(paths, n, w, h);
d.y = make_matrix(n, k*NUMCHARS);
@@ -374,7 +392,7 @@
data load_data_captcha_encode(char **paths, int n, int m, int w, int h)
{
if(m) paths = get_random_paths(paths, n, m);
- data d;
+ data d = {0};
d.shallow = 0;
d.X = load_image_paths(paths, n, w, h);
d.X.cols = 17100;
@@ -444,6 +462,9 @@
void free_data(data d)
{
+ if(d.indexes){
+ free(d.indexes);
+ }
if(!d.shallow){
free_matrix(d.X);
free_matrix(d.y);
@@ -457,7 +478,7 @@
{
char **random_paths = get_random_paths(paths, n, m);
int i;
- data d;
+ data d = {0};
d.shallow = 0;
d.X.rows = n;
@@ -509,7 +530,7 @@
{
if(m) paths = get_random_paths(paths, 2*n, m);
int i,j;
- data d;
+ data d = {0};
d.shallow = 0;
d.X.rows = n;
@@ -576,7 +597,7 @@
int h = orig.h;
int w = orig.w;
- data d;
+ data d = {0};
d.shallow = 0;
d.w = w;
d.h = h;
@@ -624,7 +645,7 @@
{
char **random_paths = get_random_paths(paths, n, m);
int i;
- data d;
+ data d = {0};
d.shallow = 0;
d.X.rows = n;
@@ -693,6 +714,8 @@
*a.d = load_data(a.paths, a.n, a.m, a.labels, a.classes, a.w, a.h);
} else if (a.type == CLASSIFICATION_DATA){
*a.d = load_data_augment(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size);
+ } else if (a.type == STUDY_DATA){
+ *a.d = load_data_study(a.paths, a.n, a.m, a.labels, a.classes, a.min, a.max, a.size);
} else if (a.type == DETECTION_DATA){
*a.d = load_data_detection(a.n, a.paths, a.m, a.classes, a.w, a.h, a.num_boxes, a.background);
} else if (a.type == WRITING_DATA){
@@ -727,7 +750,7 @@
{
if(m) paths = get_random_paths(paths, n, m);
char **replace_paths = find_replace_paths(paths, n, ".png", "-label.png");
- data d;
+ data d = {0};
d.shallow = 0;
d.X = load_image_paths(paths, n, w, h);
d.y = load_image_paths_gray(replace_paths, n, out_w, out_h);
@@ -741,7 +764,7 @@
data load_data(char **paths, int n, int m, char **labels, int k, int w, int h)
{
if(m) paths = get_random_paths(paths, n, m);
- data d;
+ data d = {0};
d.shallow = 0;
d.X = load_image_paths(paths, n, w, h);
d.y = load_labels_paths(paths, n, labels, k);
@@ -749,10 +772,22 @@
return d;
}
+data load_data_study(char **paths, int n, int m, char **labels, int k, int min, int max, int size)
+{
+ data d = {0};
+ d.indexes = calloc(n, sizeof(int));
+ if(m) paths = get_random_paths_indexes(paths, n, m, d.indexes);
+ d.shallow = 0;
+ d.X = load_image_cropped_paths(paths, n, min, max, size);
+ d.y = load_labels_paths(paths, n, labels, k);
+ if(m) free(paths);
+ return d;
+}
+
data load_data_augment(char **paths, int n, int m, char **labels, int k, int min, int max, int size)
{
if(m) paths = get_random_paths(paths, n, m);
- data d;
+ data d = {0};
d.shallow = 0;
d.X = load_image_cropped_paths(paths, n, min, max, size);
d.y = load_labels_paths(paths, n, labels, k);
@@ -791,7 +826,7 @@
data concat_data(data d1, data d2)
{
- data d;
+ data d = {0};
d.shallow = 1;
d.X = concat_matrix(d1.X, d2.X);
d.y = concat_matrix(d1.y, d2.y);
@@ -800,7 +835,7 @@
data load_categorical_data_csv(char *filename, int target, int k)
{
- data d;
+ data d = {0};
d.shallow = 0;
matrix X = csv_to_matrix(filename);
float *truth_1d = pop_column(&X, target);
@@ -817,7 +852,7 @@
data load_cifar10_data(char *filename)
{
- data d;
+ data d = {0};
d.shallow = 0;
long i,j;
matrix X = make_matrix(10000, 3072);
@@ -863,10 +898,21 @@
}
}
+void smooth_data(data d)
+{
+ int i, j;
+ float scale = 1. / d.y.cols;
+ float eps = .1;
+ for(i = 0; i < d.y.rows; ++i){
+ for(j = 0; j < d.y.cols; ++j){
+ d.y.vals[i][j] = eps * scale + (1-eps) * d.y.vals[i][j];
+ }
+ }
+}
data load_all_cifar10()
{
- data d;
+ data d = {0};
d.shallow = 0;
int i,j,b;
matrix X = make_matrix(50000, 3072);
@@ -894,9 +940,57 @@
//normalize_data_rows(d);
//translate_data_rows(d, -128);
scale_data_rows(d, 1./255);
+ smooth_data(d);
return d;
}
+data load_go(char *filename)
+{
+ FILE *fp = fopen(filename, "rb");
+ matrix X = make_matrix(3363059, 361);
+ matrix y = make_matrix(3363059, 361);
+ int row, col;
+
+ if(!fp) file_error(filename);
+ char *label;
+ int count = 0;
+ while((label = fgetl(fp))){
+ int i;
+ if(count == X.rows){
+ X = resize_matrix(X, count*2);
+ y = resize_matrix(y, count*2);
+ }
+ sscanf(label, "%d %d", &row, &col);
+ char *board = fgetl(fp);
+
+ int index = row*19 + col;
+ y.vals[count][index] = 1;
+
+ for(i = 0; i < 19*19; ++i){
+ float val = 0;
+ if(board[i] == '1') val = 1;
+ else if(board[i] == '2') val = -1;
+ X.vals[count][i] = val;
+ }
+ ++count;
+ free(label);
+ free(board);
+ }
+ X = resize_matrix(X, count);
+ y = resize_matrix(y, count);
+
+ data d = {0};
+ d.shallow = 0;
+ d.X = X;
+ d.y = y;
+
+
+ fclose(fp);
+
+ return d;
+}
+
+
void randomize_data(data d)
{
int i;
@@ -936,6 +1030,29 @@
}
}
+data get_random_data(data d, int num)
+{
+ data r = {0};
+ r.shallow = 1;
+
+ r.X.rows = num;
+ r.y.rows = num;
+
+ r.X.cols = d.X.cols;
+ r.y.cols = d.y.cols;
+
+ r.X.vals = calloc(num, sizeof(float *));
+ r.y.vals = calloc(num, sizeof(float *));
+
+ int i;
+ for(i = 0; i < num; ++i){
+ int index = rand()%d.X.rows;
+ r.X.vals[i] = d.X.vals[index];
+ r.y.vals[i] = d.y.vals[index];
+ }
+ return r;
+}
+
data *split_data(data d, int part, int total)
{
data *split = calloc(2, sizeof(data));
--
Gitblit v1.10.0