From e0976bcb30fa50e6e33c701fc057a4e93935bccf Mon Sep 17 00:00:00 2001 From: Edmond Yoo <hj3yoo@uwaterloo.ca> Date: Sat, 13 Oct 2018 06:17:09 +0000 Subject: [PATCH] Update README --- src/blas.h | 70 ++++++++++++++++++++++++++++++++++ 1 files changed, 69 insertions(+), 1 deletions(-) diff --git a/src/blas.h b/src/blas.h index 1657fc5..c40422a 100644 --- a/src/blas.h +++ b/src/blas.h @@ -1,23 +1,91 @@ #ifndef BLAS_H #define BLAS_H +void flatten(float *x, int size, int layers, int batch, int forward); void pm(int M, int N, float *A); float *random_matrix(int rows, int cols); void time_random_matrix(int TA, int TB, int m, int k, int n); +void reorg_cpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); void test_blas(); +void const_cpu(int N, float ALPHA, float *X, int INCX); +void constrain_ongpu(int N, float ALPHA, float * X, int INCX); +void pow_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_cpu(int N, float *X, int INCX, float *Y, int INCY); + void axpy_cpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); void copy_cpu(int N, float *X, int INCX, float *Y, int INCY); void scal_cpu(int N, float ALPHA, float *X, int INCX); +void fill_cpu(int N, float ALPHA, float * X, int INCX); float dot_cpu(int N, float *X, int INCX, float *Y, int INCY); void test_gpu_blas(); +void shortcut_cpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); + +void mean_cpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_cpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void normalize_cpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); + +void scale_bias(float *output, float *scales, int batch, int n, int size); +void backward_scale_cpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void mean_delta_cpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void variance_delta_cpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); +void normalize_delta_cpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); + +void smooth_l1_cpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_cpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_sum_cpu(float *a, float *b, float *s, int num, float *c); + +void softmax(float *input, int n, float temp, float *output, int stride); +void upsample_cpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); #ifdef GPU +#include "cuda.h" + void axpy_ongpu(int N, float ALPHA, float * X, int INCX, float * Y, int INCY); void axpy_ongpu_offset(int N, float ALPHA, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); void copy_ongpu(int N, float * X, int INCX, float * Y, int INCY); void copy_ongpu_offset(int N, float * X, int OFFX, int INCX, float * Y, int OFFY, int INCY); void scal_ongpu(int N, float ALPHA, float * X, int INCX); -void mask_ongpu(int N, float * X, float * mask); +void supp_ongpu(int N, float ALPHA, float * X, int INCX); +void mask_ongpu(int N, float * X, float mask_num, float * mask); +void const_ongpu(int N, float ALPHA, float *X, int INCX); +void pow_ongpu(int N, float ALPHA, float *X, int INCX, float *Y, int INCY); +void mul_ongpu(int N, float *X, int INCX, float *Y, int INCY); +void fill_ongpu(int N, float ALPHA, float * X, int INCX); + +void mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void normalize_gpu(float *x, float *mean, float *variance, int batch, int filters, int spatial); + +void normalize_delta_gpu(float *x, float *mean, float *variance, float *mean_delta, float *variance_delta, int batch, int filters, int spatial, float *delta); + +void fast_mean_delta_gpu(float *delta, float *variance, int batch, int filters, int spatial, float *mean_delta); +void fast_variance_delta_gpu(float *x, float *delta, float *mean, float *variance, int batch, int filters, int spatial, float *variance_delta); + +void fast_variance_gpu(float *x, float *mean, int batch, int filters, int spatial, float *variance); +void fast_mean_gpu(float *x, int batch, int filters, int spatial, float *mean); +void shortcut_gpu(int batch, int w1, int h1, int c1, float *add, int w2, int h2, int c2, float *out); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_scale_gpu(float *x_norm, float *delta, int batch, int n, int size, float *scale_updates); +void scale_bias_gpu(float *output, float *biases, int batch, int n, int size); +void add_bias_gpu(float *output, float *biases, int batch, int n, int size); +void backward_bias_gpu(float *bias_updates, float *delta, int batch, int n, int size); + +void smooth_l1_gpu(int n, float *pred, float *truth, float *delta, float *error); +void l2_gpu(int n, float *pred, float *truth, float *delta, float *error); +void weighted_delta_gpu(float *a, float *b, float *s, float *da, float *db, float *ds, int num, float *dc); +void weighted_sum_gpu(float *a, float *b, float *s, int num, float *c); +void mult_add_into_gpu(int num, float *a, float *b, float *c); + +void reorg_ongpu(float *x, int w, int h, int c, int batch, int stride, int forward, float *out); + +void softmax_gpu(float *input, int n, int offset, int groups, float temp, float *output); +void adam_gpu(int n, float *x, float *m, float *v, float B1, float B2, float rate, float eps, int t); +void adam_update_gpu(float *w, float *d, float *m, float *v, float B1, float B2, float eps, float decay, float rate, int n, int batch, int t); + +void flatten_ongpu(float *x, int spatial, int layers, int batch, int forward, float *out); + +void upsample_gpu(float *in, int w, int h, int c, int batch, int stride, int forward, float scale, float *out); + #endif #endif -- Gitblit v1.10.0