From 32d2c969973aa98635123743f321859192ff581d Mon Sep 17 00:00:00 2001
From: Joseph Redmon <pjreddie@gmail.com>
Date: Thu, 09 Jun 2016 22:09:30 +0000
Subject: [PATCH] hiding secret broken things
---
/dev/null | 50 --------------------------------------------------
Makefile | 6 +++---
2 files changed, 3 insertions(+), 53 deletions(-)
diff --git a/Makefile b/Makefile
index f3e4b79..28a0d17 100644
--- a/Makefile
+++ b/Makefile
@@ -1,6 +1,6 @@
-GPU=1
-CUDNN=1
-OPENCV=1
+GPU=0
+CUDNN=0
+OPENCV=0
DEBUG=0
ARCH= --gpu-architecture=compute_52 --gpu-code=compute_52
diff --git a/ai2.mk b/ai2.mk
deleted file mode 100644
index 57edc89..0000000
--- a/ai2.mk
+++ /dev/null
@@ -1,79 +0,0 @@
-GPU=0
-CUDNN=0
-OPENCV=0
-DEBUG=1
-AI2=1
-
-ARCH= --gpu-architecture=compute_52 --gpu-code=compute_52
-
-VPATH=./src/
-EXEC=darknet
-OBJDIR=./obj/
-
-CC=gcc -std=gnu11
-NVCC=nvcc
-OPTS=-Ofast
-LDFLAGS= -lm -pthread
-COMMON=
-CFLAGS=-Wall -Wfatal-errors
-
-ifeq ($(DEBUG), 1)
-OPTS=-O0 -g
-endif
-
-CFLAGS+=$(OPTS)
-
-ifeq ($(OPENCV), 1)
-COMMON+= -DOPENCV
-CFLAGS+= -DOPENCV
-LDFLAGS+= `pkg-config --libs opencv`
-COMMON+= `pkg-config --cflags opencv`
-endif
-
-ifeq ($(AI2), 1)
-COMMON+= -DAI2
-CFLAGS+= -DAI2
-endif
-
-ifeq ($(GPU), 1)
-COMMON+= -DGPU -I/usr/local/cuda/include/
-CFLAGS+= -DGPU
-LDFLAGS+= -L/usr/local/cuda/lib64 -lcuda -lcudart -lcublas -lcurand
-endif
-
-ifeq ($(CUDNN), 1)
-COMMON+= -DCUDNN
-CFLAGS+= -DCUDNN
-LDFLAGS+= -lcudnn
-endif
-
-OBJ=gemm.o utils.o cuda.o deconvolutional_layer.o convolutional_layer.o list.o image.o activations.o im2col.o col2im.o blas.o crop_layer.o dropout_layer.o maxpool_layer.o softmax_layer.o data.o matrix.o network.o connected_layer.o cost_layer.o parser.o option_list.o darknet.o detection_layer.o imagenet.o captcha.o route_layer.o writing.o box.o nightmare.o normalization_layer.o avgpool_layer.o coco.o dice.o yolo.o layer.o compare.o classifier.o local_layer.o swag.o shortcut_layer.o activation_layer.o rnn_layer.o gru_layer.o rnn.o rnn_vid.o crnn_layer.o coco_demo.o tag.o cifar.o yolo_demo.o go.o batchnorm_layer.o art.o xnor_layer.o common.o binary_convolution.o
-ifeq ($(GPU), 1)
-LDFLAGS+= -lstdc++
-OBJ+=convolutional_kernels.o deconvolutional_kernels.o activation_kernels.o im2col_kernels.o col2im_kernels.o blas_kernels.o crop_layer_kernels.o dropout_layer_kernels.o maxpool_layer_kernels.o softmax_layer_kernels.o network_kernels.o avgpool_layer_kernels.o
-endif
-
-OBJS = $(addprefix $(OBJDIR), $(OBJ))
-DEPS = $(wildcard src/*.h) Makefile
-
-all: obj results $(EXEC)
-
-$(EXEC): $(OBJS)
- $(CC) $(COMMON) $(CFLAGS) $^ -o $@ $(LDFLAGS)
-
-$(OBJDIR)%.o: %.c $(DEPS)
- $(CC) $(COMMON) $(CFLAGS) -c $< -o $@
-
-$(OBJDIR)%.o: %.cu $(DEPS)
- $(NVCC) $(ARCH) $(COMMON) --compiler-options "$(CFLAGS)" -c $< -o $@
-
-obj:
- mkdir -p obj
-results:
- mkdir -p results
-
-.PHONY: clean
-
-clean:
- rm -rf $(OBJS) $(EXEC)
-
diff --git a/src/binary_convolution.c b/src/binary_convolution.c
deleted file mode 100644
index dfededa..0000000
--- a/src/binary_convolution.c
+++ /dev/null
@@ -1,598 +0,0 @@
-#include "binary_convolution.h"
-
-int ai2_bin_dp(BINARY_WORD *a, BINARY_WORD *b, dim3 vdim) { // TODO unroll
- int accumulator = 0;
- for (int z = 0; z < vdim.z / BITS_PER_BINARY_WORD; z++) {
- for (int y = 0; y < vdim.y; y++) {
- for (int x = 0; x < vdim.x; x++) {
- int idx = z*vdim.y*vdim.x + y*vdim.x + x;
- accumulator += __builtin_popcount(~(a[idx] ^ b[idx])); // count the XNOR of the two bit vectors
- }
- }
- }
-
- return accumulator;
-}
-
-/**
- * Pre-conditions:
- * alpha_volume is an array of size x*y*z.
- * alpha_plane is an array of size x*y.
- * alpha_volume (x,y,z) is transposed to (z,x,y).
- */
-void ai2_calc_alpha(float *alpha_plane, float *alpha_volume, dim3 vdim) {
- for (int y = 0; y < vdim.y; ++y) {
- for (int x = 0; x < vdim.x; ++x) {
- int out = y * vdim.x + x;
- double accum = 0.0;
- for (int z = 0; z < vdim.z; ++z) {
- accum += alpha_volume[out * vdim.z + z];
- }
-
- alpha_plane[out] = accum / vdim.z;
- }
- }
-}
-
-/** @brief Wrapper function for generating the beta scaling factor */
-void ai2_calc_beta(float *beta_plane, float *beta_volume, dim3 vdim) {
- ai2_calc_alpha(beta_plane, beta_volume, vdim);
-}
-
-/** @brief Set the bit in a binary word */
-void ai2_bitset(BINARY_WORD *bword, unsigned int position) {
- BINARY_WORD mask = (1 << position);
- *bword = *bword | mask;
-}
-
-/** @brief Checks that the bit is set in a binary word */
-int ai2_is_set(BINARY_WORD bword, unsigned int position) {
- unsigned int position_complement = (BITS_PER_BINARY_WORD - 1) - position; // number of leading bits before the bit position of interest
- bword = (bword << position_complement); // zero out leading bits
- bword = (bword >> (BITS_PER_BINARY_WORD - 1)); // shift bit position of interest to the 0th position
- return (bword & 0x1); // test if bit position of interest is set
-}
-
-void ai2_flt_to_bin(BINARY_WORD *binary_vol, float *real_vol, dim3 dim) {
- ai2_transpose3D(real_vol, dim); // (x,y,z) -> (z,x,y)
-
- int sz = dim.x * dim.y * dim.z;
- for (int i = 0; i < sz; i += BITS_PER_BINARY_WORD) {
- BINARY_WORD tmp = 0x00000000;
- for (int x = 0; x < BITS_PER_BINARY_WORD; ++x) {
- int waddr = x + i;
- if (signbit(real_vol[waddr]) == 0)
- ai2_bitset(&tmp, (BITS_PER_BINARY_WORD - 1) - x);
- }
- binary_vol[i / BITS_PER_BINARY_WORD] = tmp;
- }
-}
-
-void ai2_bin_to_flt(float *real_vol, BINARY_WORD *binary_vol, dim3 dim) { // TODO unit tests
- for (int z = 0; z < dim.z; z++) {
- for (int y = 0; y < dim.y; y++) {
- for (int x = 0; x < dim.x / BITS_PER_BINARY_WORD; x++) { // TODO boundary checks, for uneven input
- BINARY_WORD word = binary_vol[z*dim.y*dim.x + y*dim.x + x];
- for (int t = 0; t < BITS_PER_BINARY_WORD; ++t) {
- int oidx = z*dim.y*dim.x + y*dim.x + x * BITS_PER_BINARY_WORD + t;
- if (ai2_is_set(word, t))
- real_vol[oidx] = 1.f;
- else
- real_vol[oidx] = -1.f;
- }
- }
- }
- }
-
- // Transpose channels back to output
- ai2_transpose3D(real_vol, dim); // (z,y,x) -> (x,y,z)
-}
-
-/* @brief: input is padded.
- */
-void ai2_bin_conv2D(float *output, const BINARY_WORD *input, const BINARY_WORD *weights, int ix, int iy, int wx, int wy, int pad, int stride) {
-
- int r, rd, c, cd;
- int wx_2 = wx / 2;
- int wy_2 = wy / 2;
-
- // Indexing for output pixels. x = [wx_2, ix + wx_2 - 1], y = [wy_2, iy + wy_2 - 1]
- int sx = pad; // start x
- int ex = ix + pad - 1; // end x
- int sy = pad; // start y
- int ey = iy + pad - 1; // end y
-
- // Indexing for weights
- int wsx, wex, wsy, wey;
- if (wx % 2 == 1) { // odd weights
- wsx = -wx_2; wex = wx_2 + 1;
- wsy = -wy_2; wey = wy_2 + 1;
- }
- else {
- wsx = -wx_2; wex = wx_2;
- wsy = -wy_2; wey = wy_2;
- }
-
- int px = ix + 2*pad;
- //int py = iy + 2*pad;
-
- for (r = sy; r <= ey; ++r) {
- for (c = sx; c <= ex; ++c) {
- int accumulator = 0;
- for (rd = wsy; rd < wey; ++rd) {
- for (cd = wsx; cd < wex; ++cd) {
- int iidx = (r+rd)*px + (c+cd);
- BINARY_WORD pixel = input[iidx];
- //BINARY_WORD pixel = 0xFFFFFFFF;
- //BINARY_WORD weight = 0xFFFFFFFF;
- int widx = (rd + wy_2)*wx + (cd+wx_2);
- BINARY_WORD weight = weights[widx];
- accumulator += __builtin_popcount(~(pixel ^ weight));
- }
- }
-
- // Padded space
- int oidx = r*px + c;
- output[oidx] += (float) accumulator;
- }
- }
-
- //for (r = sy; r <= ey; ++r) {
- // for (c = sx; c <= ex; ++c) {
- // int accumulator = 0;
- // for (rd = -wy_2; rd < wy_2; ++rd) {
- // for (cd = -wx_2; cd < wx_2; ++cd) {
- // int iidx = (r+rd)*px + (c+cd);
- // BINARY_WORD pixel = input[iidx];
- // //BINARY_WORD pixel = 0xFFFFFFFF;
- // //BINARY_WORD weight = 0xFFFFFFFF;
- // int widx = (rd + wy_2)*wx + (cd+wx_2);
- // BINARY_WORD weight = weights[widx];
- // accumulator += __builtin_popcount(~(pixel ^ weight));
- // }
- // }
-
- // // Padded space
- // int oidx = r*px + c;
- // output[oidx] += (float) accumulator;
- // }
- //}
-
- //ai2_bin_conv_within_boundary(output, input, weights, ix, iy, wx, wy, stride);
- //ai2_bin_conv_borders(output, input, weights, ix, iy, wx, wy, stride);
-}
-
-void ai2_pointwise_mul_mm(float *output, const float *input, int N) {
- int i = 0;
-
- while (i + 8 <= N) {
- output[i+0] *= input[i+0];
- output[i+1] *= input[i+1];
- output[i+2] *= input[i+2];
- output[i+3] *= input[i+3];
- output[i+4] *= input[i+4];
- output[i+5] *= input[i+5];
- output[i+6] *= input[i+6];
- output[i+7] *= input[i+7];
-
- i += 8;
- }
-
- while (++i < N) // Finish iteration that's leftover (e.g., last batch not divisible by 8 exactly)
- output[i] *= input[i];
-}
-
-/** @brief Performs a tiled pointwise matrix multiplication between two 2D tensors
- * Pre-conditions: wx < ix, and wy < iy
- */
-void ai2_pointwise_mul_mm_2d(float *output, const float *alpha, int ix, int iy, int wx, int wy, int pad) {
- // Slower version
-// for (int y = 0; y < iy; ++y)
-// for (int x = 0; x < ix; x++)
-// output[y*ix+x] *= input[(y % wy)*wx + (x % wx)];
-
- // Stride prefetch optimized
- for (int s = 0; s < wy; ++s) { // for each strip
- const float *strip_ptr = &alpha[s*wx];
- for (int y = pad; y < pad + (iy / wy); ++y) { //
- int stride = y*((ix+2*pad)*wy) + s*(ix+2*pad);
- float *output_ptr = &output[stride];
-
- for (int x = 0; x < ix; ++x) {
- output_ptr[x] *= strip_ptr[x % wx];
- }
- }
- }
-}
-
-void ai2_setFltInput(ai2_bin_conv_layer *layer, float *new_input) {
- if (new_input != NULL) {
- if (layer->input != NULL)
- free(layer->input);
- layer->input = new_input;
-
- dim3 dim;
- dim.x = layer->px;
- dim.y = layer->py;
- dim.z = layer->c;
-
- // Binarize input
- ai2_flt_to_bin(layer->binary_input, layer->input, dim);
-
- float *new_beta = (float *) calloc (dim.x * dim.y, sizeof(float));
- ai2_setFltBeta(layer, new_beta);
-
- // layer->input is transposed to (z,x,y) already
- ai2_calc_beta(layer->beta, layer->input, dim);
- }
-}
-
-void ai2_setBinInput(ai2_bin_conv_layer *layer, BINARY_WORD *new_input) {
- if (new_input != NULL) {
- if (layer->binary_input != NULL)
- free(layer->binary_input);
- layer->binary_input = new_input;
- }
-}
-
-void ai2_setFltWeights(ai2_bin_conv_layer *layer, float *new_weights) {
- if (new_weights != NULL) {
- if (layer->weights != NULL)
- free(layer->weights);
- layer->weights = new_weights;
-
- dim3 dim;
- dim.x = layer->wx;
- dim.y = layer->wy;
- dim.z = layer->c;
-
- ai2_flt_to_bin(layer->binary_weights, layer->weights, dim);
-
- // Calculate alpha
- if (layer->alpha != NULL)
- free(layer->alpha);
-
- layer->alpha = (float *) calloc (dim.x * dim.y, sizeof(float));
- // layer->weights is already transposed to (z,x,y) from ai2_flt_to_bin()
- ai2_calc_alpha(layer->alpha, layer->weights, dim);
- }
-}
-
-void ai2_setBinWeights(ai2_bin_conv_layer *layer, BINARY_WORD *new_weights) {
- if (new_weights != NULL) {
- if (layer->binary_weights != NULL)
- free(layer->binary_weights);
- layer->binary_weights = new_weights;
- }
-}
-
-void ai2_setFltOutput(ai2_bin_conv_layer *layer, float *new_output) {
- if (new_output != NULL) {
- if (layer->output != NULL)
- free(layer->output);
- layer->output = new_output;
- }
-}
-
-void ai2_setBinOutput(ai2_bin_conv_layer *layer, BINARY_WORD *new_output) {
- if (new_output != NULL) {
- if (layer->binary_output != NULL)
- free(layer->binary_output);
- layer->binary_output = new_output;
- }
-}
-
-void ai2_setFltAlpha(ai2_bin_conv_layer *layer, float *new_alpha) {
- if (new_alpha != NULL) {
- if (layer->alpha != NULL)
- free(layer->alpha);
- layer->alpha = new_alpha;
- }
-}
-
-void ai2_setFltBeta(ai2_bin_conv_layer *layer, float *new_beta) {
- if (new_beta != NULL) {
- if (layer->beta != NULL)
- free(layer->beta);
- layer->beta = new_beta;
- }
-}
-
-void ai2_setFltNewBeta(ai2_bin_conv_layer *layer, float *new_new_beta) {
- if (new_new_beta != NULL) {
- if (layer->new_beta != NULL)
- free(layer->new_beta);
- layer->new_beta = new_new_beta;
- }
-}
-
-float* ai2_getFltOutput(ai2_bin_conv_layer *layer) {
- //if (layer->output != NULL && layer->binary_output != NULL) {
- if (layer->output != NULL) {
-
- // The idea here was that all intermediate states are stored in the binary output.
- // Whenever the user needs the real-valued output, the conversion happens at this function call.
- //dim3 dim;
- //dim.x = layer->px;
- //dim.y = layer->py;
- //dim.z = layer->batch;
- //ai2_bin_to_flt(layer->output, layer->binary_output, dim);
-
- return layer->output;
- }
- else
- return NULL;
-}
-
-void ai2_transpose3D(float *data, dim3 d) {
- // Slow transpose for correctness
-
- // (x,y,z) becomes (z,x,y). Requires two transposes:
- // (x,y,z) -> (x,z,y).
- // (x,z,y) -> (z,x,y).
-
- // Intermediate buffer
- float *new_data = (float *) calloc (d.x * d.y * d.z, sizeof(float));
-
- // Transpose y and z axis.
- // (x,y,z) -> (x,z,y);
- for (int y = 0; y < d.y; ++y) {
- for (int z = 0; z < d.z; ++z) {
- for (int x = 0; x < d.x; ++x) {
- new_data[y*d.x*d.z + z*d.x + x] = data[z*d.x*d.y + y*d.x + x];
- //new_data[z*d.y*d.x + y*d.x + x] = data[y*d.x*d.z + z*d.x + x];
- }
- }
- }
-
- // Transpose x and z axis.
- // (x,z,y) -> (z,x,y)
- for (int y = 0; y < d.y; ++y) {
- for (int x = 0; x < d.x; ++x) {
- for (int z = 0; z < d.z; ++z) {
- data[y*d.z*d.x + x*d.z + z] = new_data[y*d.x*d.z + x + z*d.x];
- }
- }
- }
-
- free(new_data);
-}
-
-int ai2_isFloatWhole(float f) { // TODO unit test
- return (ceilf(f) == f) ? 1 : 0;
-}
-
-/* @brief Initialize and create all memory arrays for this layer
- * b - batches (number of filter batches)
- * c - input channels
- * ix - input width
- * iy - input height
- * wx - weight/filter width
- * wy - weight/filter height
- * s - stride between sliding windows
- * pad - the amount of padding
- */
-ai2_bin_conv_layer ai2_make_bin_conv_layer(int b, int c, int ix, int iy, int wx, int wy, int s, int pad) {
- // http://cs231n.github.io/convolutional-networks/
- // See: spatial arrangement section for determining what the output size will be
- float output_size = ((ix - wx + 2 * pad) / s) + 1;
- if (ai2_isFloatWhole(output_size) == 0) {
- fprintf(stderr, "ERROR! conv layer of (b,c,ix,iy,s,pad) = (%d, %d, %d, %d, %d, %d) will give "
- " invalid output dimension: %fx%f\n", b, c, ix, iy, s, pad, output_size, output_size);
- exit(1);
- }
-
- // TODO: Support strided output
- if (s != 1) {
- fprintf(stderr, "ERROR! Only stride values of 1 is supported\n");
- exit(1);
- }
-
- // padded input size
- int px = (int) ix + 2*pad;
- int py = (int) iy + 2*pad;
-
- ai2_bin_conv_layer l = {0}; // initialize all to 0
- l.input = (float *) calloc (c * px * py, sizeof(float)); // is padded
- l.binary_input = (BINARY_WORD *) calloc (c * px * py / BITS_PER_BINARY_WORD, sizeof(BINARY_WORD)); // is padded
-
- dim3 dim;
- dim.x = px;
- dim.y = py;
- dim.z = c;
- ai2_flt_to_bin(l.binary_input, l.input, dim);
-
- l.weights = (float *) calloc (b * c * wx * wy, sizeof(float));
- l.binary_weights = (BINARY_WORD *) calloc (b * c * wx * wy / BITS_PER_BINARY_WORD, sizeof(BINARY_WORD));
-
- l.output = (float *) calloc (c * px * py, sizeof(float)); // is padded
- l.new_beta = (float *) calloc(px * py, sizeof(float)); // is padded
-
- l.batch = b;
- l.c = c;
- l.h = iy;
- l.w = ix;
- l.stride = s;
- l.pad = pad;
- l.px = px;
- l.py = py;
- l.wx = wx;
- l.wy = wy;
-
- // The following parameters are uninitialized and should be set elsewhere:
- // l.beta - padded
- // l.alpha - not padded
-
- return l;
-}
-
-void ai2_free_bin_conv_layer(ai2_bin_conv_layer *layer) {
- if (layer->input) free (layer->input);
- if (layer->binary_input) free(layer->binary_input);
- if (layer->weights) free (layer->weights);
- if (layer->binary_weights) free(layer->binary_weights);
- if (layer->output) free(layer->output);
- if (layer->binary_output) free (layer->binary_output);
- if (layer->alpha) free(layer->alpha);
- if (layer->beta) free(layer->beta);
- if (layer->new_beta) free(layer->new_beta);
-}
-
-void ai2_throw_error(char *str) {
- fprintf(stderr, "ERROR: %s\n", str);
- exit(1);
-}
-
-void ai2_bin_forward(ai2_bin_conv_layer *l) {
- if (l->input == NULL) ai2_throw_error("Input was not allocated and set in this layer");
- if (l->weights == NULL) ai2_throw_error("Weights was not allocated and set in this layer");
- if (l->output == NULL) ai2_throw_error("Output was not allocated and set in this layer");
- if (l->alpha == NULL) ai2_throw_error("Alpha was not allocated and set in this layer");
- if (l->beta == NULL) ai2_throw_error("Beta was not allocated and set in this layer");
-
- if (l->c % 32 != 0) ai2_throw_error("Channel is not divisible by 32. Need to implement mask "
- "before supporting arbitrary channel size. For now, "
- "set the channel size to the nearest multiple of 32 "
- "and ignore any ''extra'' channels unused.");
-
- l->c /= BITS_PER_BINARY_WORD; // For compensating with doing more work per word
-
- float *output = l->output;
- float *alpha = l->alpha;
- float *beta = l->beta;
- int px = l->px;
- int py = l->py;
- BINARY_WORD *binary_weights = l->binary_weights;
-
- for (int z = 0; z < l->batch; ++z) { // for each filter map
- BINARY_WORD *binary_input = l->binary_input;
- for (int c = 0; c < l->c; ++c) { // for each input channel
- ai2_bin_conv2D(output, binary_input, binary_weights, l->w, l->h, l->wx, l->wy, l->pad, l->stride);
- binary_input += px*py; // increment with next 2D plane
- binary_weights += l->wx*l->wy; // increment with next 2D plane
-
- ai2_pointwise_mul_mm(output, beta, px*py);
- ai2_pointwise_mul_mm_2d(output, alpha, l->w, l->h, l->wx, l->wy, l->pad);
- }
- }
-}
-
-// Deprecated
-//double ai2_bin_conv_benchmark(ConvolutionArgs conv_args) {
-// printf("Running Binary Convolution test!\n");
-//
-// size_t ix, iy, iz, wx, wy, wz, L, stride;
-// ix = conv_args.input.x;
-// iy = conv_args.input.y;
-// iz = conv_args.input.z;
-// wx = conv_args.weights.x;
-// wy = conv_args.weights.y;
-// wz = conv_args.weights.z;
-// L = BITS_PER_BINARY_WORD;
-// stride = 1;
-//
-// printf("Input size (num elements, xyz): %zu %zu %zu\n", ix, iy, iz);
-// printf("Weights size (num elements. xyz): %zu %zu %zu\n", wx, wy, wz);
-//
-// double sz_input_elements = ix * iy * iz;
-// double sz_input_bytes = getSizeBytesBinaryArray(conv_args.input);
-// double sz_weight_bytes = getSizeBytesBinaryArray(conv_args.weights);
-//
-// printf("Input Size (MB): %f\n", sz_input_bytes / (1 << 20));
-// printf("Weight Size (MB): %f\n", sz_weight_bytes / (1 << 20));
-//
-// BINARY_WORD *binary_input = mallocBinaryVolume(conv_args.input);
-// BINARY_WORD *binary_weights = mallocBinaryVolume(conv_args.weights);
-// BINARY_WORD *b_input = binary_input; // alias
-// BINARY_WORD *b_weight = binary_weights; // alias
-// float *output = mallocFloatVolume(conv_args.output);
-// float *output_ptr = output;
-// float *beta = (float *) malloc(sizeof(float) * ix * iy); // we assume beta is given to us
-// float *alpha = (float *) malloc(sizeof(float) * wx * wy); // we assume alpha is given to us
-// float *new_output = mallocFloatVolume(conv_args.output);
-// //float *new_output_ptr = new_output;
-// float *new_beta = (float *) malloc(sizeof(float) * ix * iy);
-// //float *new_beta_ptr = new_beta;
-//
-// // Scale number of computations because we're packing.
-// // After this point, you should not have to reason about input dimensions for input and weights.
-// iz /= BITS_PER_BINARY_WORD;
-// wz /= BITS_PER_BINARY_WORD;
-//
-// // Calculate time taken by a request
-// struct timeval start_time;
-// gettimeofday(&start_time, NULL);
-//
-// // Preprocessing
-// int pad = wx/2;
-//
-// for (int z = 0; z < iz; ++z) { // number of channels
-// ai2_bin_conv2D(output_ptr, b_input, b_weight, ix, iy, wx, wy, pad, stride);
-// b_input += ix*iy; // increment with next 2D plane
-// b_weight += wx*wy; // increment with next 2D plane
-//
-// ai2_pointwise_mul_mm(output_ptr, beta, ix*iy);
-// ai2_pointwise_mul_mm_2d(output_ptr, alpha, ix, iy, wx, wy, pad);
-// }
-//
-// // copy to new array (need to wrap this around); TODO.
-// struct timeval end_time;
-// gettimeofday(&end_time, NULL);
-//
-// struct timeval diff_time;
-// timersub(&end_time, &start_time, &diff_time);
-// double time_conv_s = diff_time.tv_sec + diff_time.tv_usec * 1e-6;
-// double time_conv_ms = time_conv_s * 1000.0;
-//
-// double model_ops = (3*ix*iy*wx*wy*wz/L) + 2*ix*iy + ix*iy*iz;
-// double conv_ops_s = 1e-9 * model_ops / time_conv_s;
-// double conv_bandwidth_gb_s = 1e-9 * sz_input_bytes / (time_conv_ms / 1000.0);
-// double conv_bandwidth_gelement_s = 1e-9 * sz_input_elements / (time_conv_ms / 1000.0);
-//
-// printf("Execution Time (ms): %f\n", time_conv_ms);
-// printf("Binary Convolution OPS/s (GOPS/s): %f\n", conv_ops_s);
-// printf("Binary Convolution Bandwidth (GB/s): %f\n", conv_bandwidth_gb_s);
-// printf("Binary Convolution Bandwidth (GElements/s): %f\n\n", conv_bandwidth_gelement_s);
-//
-// free(binary_input);
-// free(binary_weights);
-// free(output);
-// free(beta);
-// free(alpha);
-// free(new_output);
-// free(new_beta);
-//
-// return time_conv_ms;
-//}
-
-// double ai2_bin_conv_benchmark(ConvolutionArgs conv_args);
-
-//void benchmark() {
-// int ix, iy, iz, wx, wy, wz;
-// iz = (1 << 9) * BITS_PER_BINARY_WORD;
-// ix = 227; // x == y for square face
-// iy = 227;
-// wx = 3; // x == y for a square face
-// wy = 3;
-// wz = iz;
-//
-// int runs = 1;
-// double accum_binary = 0;
-// double accum_real = 0;
-// ConvolutionArgs conv_args = initArgs(ix, iy, iz, wx, wy, wz);
-// for (int i = 0; i < runs; ++i) {
-// double t_binary_convolve = ai2_bin_conv_benchmark(conv_args);
-// double t_real_convolve = run_convolve2D_real(conv_args);
-// printf("t binary = %lf\n", t_binary_convolve);
-// printf("t real = %lf\n", t_real_convolve);
-// accum_binary += t_binary_convolve;
-// accum_real += t_real_convolve;
-// }
-//
-// accum_binary /= runs;
-// accum_real /= runs;
-// printf("Average convolution pass binary (ms): %lf\n", accum_binary);
-// printf("Average convolution pass flt (ms): %lf\n", accum_real);
-// printf("Speedup (Binary over Real): %lfx\n", accum_real / accum_binary);
-// exit(1);
-//}
diff --git a/src/binary_convolution.h b/src/binary_convolution.h
deleted file mode 100644
index 602677e..0000000
--- a/src/binary_convolution.h
+++ /dev/null
@@ -1,218 +0,0 @@
-#ifndef AI2_BINARY_CONVOLUTION_H
-#define AI2_BINARY_CONVOLUTION_H
-
-/** @file binary_convolution.h
- * @brief Routines related for approximating convolutions using binary operations
- *
- * @author Carlo C. del Mundo (carlom)
- * @date 05/23/2016
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <inttypes.h>
-#include <assert.h>
-#include <limits.h>
-#include <tgmath.h>
-#include <unistd.h>
-#include <stdint.h>
-#include <string.h>
-#include "common.h"
-
-typedef struct {
- int batch; // number of filter batches
- int c; // channels, z
- int h; // height, y
- int w; // width, x
- int stride;
- int pad;
-
- int px; // padded x (use this for striding in padded input and output arrays)
- int py; // padded y (use this for striding in padded input and output arrays)
- int wx;
- int wy;
-
- float *input; // input values
- BINARY_WORD *binary_input;
-
- float *weights; // weight or filter values
- BINARY_WORD *binary_weights;
-
- float *output; // output values
- BINARY_WORD *binary_output;
-
- float *alpha; // we assume alpha is calculated at the beginning of initialization
- float *beta; // we assume beta is given to us
- float *new_beta; // we calculate the new beta for the next layer
-
- struct ai2_bin_conv_layer *next;
-} ai2_bin_conv_layer;
-
-/** @brief Performs a binary convolution using XNOR and POPCOUNT between input and weights
- *
- * @param output A 2D real-valued plane to store the outputs
- * @param input A 2D binary-valued plane that holds the inputs
- * @param weights A 2D binary-valued plane that holds the weights
- * @param ix the input's x dimension
- * @param iy the input's y dimensions
- * @param wx the weight's x dimension
- * @param wy the weight's y dimension
- * @param pad the amount of padding applied to input. (ix+2*pad is the x dimension of the input
- * @param stride NOP. TODO: implement stride. the stride between sliding windows
- * @return the count of all overlapping set bits between the two volumes.
- */
-void ai2_bin_conv2D(float *output, const BINARY_WORD *input, const BINARY_WORD *weights, int ix, int iy, int wx, int wy, int pad, int stride);
-
-/** @brief Performs a binary dot product (XNOR and POPCOUNT) for two equal sized volumes.
- *
- * @param a A 3D binary tensor
- * @param b A 3D binary tensor
- * @param vdim the dimensionality of the data. Note: we pack 32 elements in the Z element.
- * @return the count of all overlapping set bits between the two volumes.
- */
-int ai2_bin_dp(BINARY_WORD *a, BINARY_WORD *b, dim3 vdim);
-
-/** @brief Calculates the alpha plane given an alpha volume.
- *
- * Each point in the yz alpha plane
- * is the average sum of the absolute value of all elements in the z-direction.
- *
- * Pre-conditions:
- * alpha_volume is an array of size x*y*z.
- * alpha_plane is an array of size x*y.
- * alpha_volume (x,y,z) is transposed to (z,x,y).
- *
- * @param alpha_plane The 2D real-valued output plane
- * @param alpha_volume The 3D real-valued output volume
- * @param vdim the dimensionality of alpha_volume.
- */
-void ai2_calc_alpha(float *alpha_plane, float *alpha_volume, dim3 vdim);
-
-/** @brief Wrapper function for generating the beta scaling factor */
-void ai2_calc_beta(float *beta_plane, float *beta_volume, dim3 vdim);
-
-/** @brief Set the bit in a binary word */
-void ai2_bitset(BINARY_WORD *bword, unsigned int position);
-
-/** @brief Checks that the bit is set in a binary word */
-int ai2_is_set(BINARY_WORD bword, unsigned int position) ;
-
-/** @brief Converts a 3D float tensor into a 3D binary tensor.
- *
- * The value of the ith element in the binary tensor is the sign
- * of the ith element in the floating tensor.
- *
- * @param binary_vol the binary tensor
- * @param real_vol the real tensor
- * @param vdim the size of the 3D tensor
- */
-void ai2_flt_to_bin(BINARY_WORD *binary_vol, float *real_vol, dim3 vdim) ;
-
-/** @brief Converts a 3D binary tensor into a 3D float tensor.
- *
- * The ith float element will be '1' if the ith binary element is '1'.
- * Otherwise, the float element will be '-1'.
- *
- * @param real_vol the output real tensor
- * @param binary_vol the input binary tensor
- * @param vdim the dimension of both binary_vol and real_vol
- */
-void ai2_bin_to_flt(float *real_vol, BINARY_WORD *binary_vol, dim3 vdim);
-
-/** @brief Performs a pointwise matrix multication between two 2D tensors
- * @param output A 2D real-valued plane to store the outputs
- * @param input A 2D binary-valued plane that holds the inputs
- * @param N the number of elements between the arrays
- */
-void ai2_pointwise_mul_mm(float *output, const float *input, int N);
-
-/** @brief Performs a tiled pointwise matrix multiplication between two 2D tensors
- *
- * Pre-conditions: wx < ix, and wy < iy
- *
- * @param output A 2D real-valued plane of size ix, iy
- * @param alpha A 2D binary-valued plane of size wx, wy
- * @param ix the output's x dimension
- * @param iy the output's y dimensions
- * @param wx the alpha's x dimension
- * @param wy the alpha's y dimension
- * @param pad how many cells are padded, adds 2*pad to the borders of the image
- */
-void ai2_pointwise_mul_mm_2d(float *output, const float *alpha, int ix, int iy, int wx, int wy, int pad);
-
-// --------------------------------------
-// SETTER FUNCTIONS
-// --------------------------------------
-/** @brief Safe function to set the float input of a conv_layer
- */
-void ai2_setFltInput(ai2_bin_conv_layer *layer, float *new_input);
-
-/** @brief Safe function to set the binary input of a conv_layer
- */
-void ai2_setBinInput(ai2_bin_conv_layer *layer, BINARY_WORD *new_input);
-
-/** @brief Safe function to set the binary weights of a conv_layer
- */
-void ai2_setFltWeights(ai2_bin_conv_layer *layer, float *new_weights);
-
-/** @brief Safe function to set the binary weights of a conv_layer
- */
-void ai2_setBinWeights(ai2_bin_conv_layer *layer, BINARY_WORD *new_weights);
-
-/** @brief Safe function to set the binary outputs of a conv_layer
- */
-void ai2_setFltOutput(ai2_bin_conv_layer *layer, float *new_output);
-
-/** @brief Safe function to set the binary outputs of a conv_layer
- */
-void ai2_setBinOutput(ai2_bin_conv_layer *layer, BINARY_WORD *new_output);
-
-/** @brief Safe function to set the alpha of a conv_layer
- */
-void ai2_setFltAlpha(ai2_bin_conv_layer *layer, float *new_alpha);
-
-/** @brief Safe function to set the beta of a conv_layer
- */
-void ai2_setFltBeta(ai2_bin_conv_layer *layer, float *new_beta);
-
-/** @brief Safe function to set the new_beta of a conv_layer
- */
-void ai2_setFltNewBeta(ai2_bin_conv_layer *layer, float *new_new_beta);
-
-// --------------------------------------
-// GETTER FUNCTIONS
-// --------------------------------------
-/** @brief Safe function to get the float outputs of a conv_layer
- */
-float * ai2_getFltOutput(ai2_bin_conv_layer *layer);
-
-/** @brief 3D tranpose from (x,y,z) to (z,y,x)
- * @return a new pointer with the transposed matrix
- */
-void ai2_transpose3D(float *data, dim3 d);
-
-/** @brief Checks if a float is a whole number (e.g., an int)
- */
-int ai2_isFloatWhole(float f);
-
-/* @brief Allocates all memory objects in an ai2_bin_conv_layer
- * b - batches (number of filter batches)
- * c - input channels
- * ix - input width
- * iy - input height
- * wx - weight/filter width
- * wy - weight/filter height
- * s - stride between sliding windows
- * pad - the amount of padding
- */
-ai2_bin_conv_layer ai2_make_bin_conv_layer(int b, int c, int ix, int iy, int wx, int wy, int s, int pad);
-
-/* @brief Safe deallocation of all memory objects in an ai2_bin_conv_layer
- */
-void ai2_free_bin_conv_layer(ai2_bin_conv_layer *layer);
-
-/* @brief Given real-valued filter data and a conv layer, performs a forward pass
- */
-void ai2_bin_forward(ai2_bin_conv_layer *layer);
-
-#endif
diff --git a/src/common.c b/src/common.c
deleted file mode 100644
index 9d59ee8..0000000
--- a/src/common.c
+++ /dev/null
@@ -1,81 +0,0 @@
-#include "common.h"
-
-// Returns the time in ms
-double getElapsedTime(Timer *timer) {
- // Calculate time it took in seconds
- double accum_ms = ( timer->requestEnd.tv_sec - timer->requestStart.tv_sec )
- + ( timer->requestEnd.tv_nsec - timer->requestStart.tv_nsec )
- / 1e6;
- return accum_ms;
-}
-
-void start_timer(Timer *timer) {
- clock_gettime(CLOCK_MONOTONIC_RAW, &(timer->requestStart));
-}
-
-void stop_timer(Timer *timer) {
- clock_gettime(CLOCK_MONOTONIC_RAW, &(timer->requestEnd));
-}
-
-
-BINARY_WORD * mallocBinaryVolume(dim3 vol) {
- return (BINARY_WORD *) malloc (vol.x * vol.y * vol.z / BITS_PER_BINARY_WORD * sizeof(BINARY_WORD));
-}
-
-float * mallocFloatVolume(dim3 vol) {
- return (float *) malloc (vol.x * vol.y * vol.z * sizeof(float));
-}
-
-// Returns the size (in bytes) of a binary array with dimensions stored in conv_args
-double getSizeBytesBinaryArray(dim3 conv_args) {
- return conv_args.x * conv_args.y * conv_args.z * sizeof(BINARY_WORD) / (BITS_PER_BINARY_WORD);
-}
-
-
-ConvolutionArgs initArgs(size_t ix, size_t iy, size_t iz, size_t wx, size_t wy, size_t wz) {
- ConvolutionArgs conv_args;
- // Input Volume
- conv_args.input.x = ix; // x == y for a square face
- conv_args.input.y = iy;
- conv_args.input.z = iz;
- conv_args.weights.x = wx; // x == y for square face
- conv_args.weights.y = wy;
- conv_args.weights.z = wz;
-
- // <!-- DO NOT MODIFY -->
- // Intermediate Volumes
- conv_args.alpha_plane.x = conv_args.weights.x;
- conv_args.alpha_plane.y = conv_args.weights.y;
- conv_args.alpha_plane.z = 1;
-
- conv_args.beta_plane.x = 1;
- conv_args.beta_plane.y = conv_args.input.y;
- conv_args.beta_plane.z = conv_args.input.z;
-
- conv_args.gamma_plane.x = conv_args.input.x * conv_args.weights.x;
- conv_args.gamma_plane.y = conv_args.input.y * conv_args.weights.y;
- conv_args.gamma_plane.z = 1;
-
- conv_args.zeta_plane.x = conv_args.gamma_plane.x;
- conv_args.zeta_plane.y = conv_args.gamma_plane.y;
- conv_args.zeta_plane.z = 1;
-
- // Output Volume
- conv_args.output.x = conv_args.input.x;
- conv_args.output.y = conv_args.input.y;
- conv_args.output.z = 1; // Output should be a 2D plane
-
- // Verify dimensions
- //assert(conv_args.weights.x % 32 == 0); // must be divisble by 32 for efficient alignment to unsigned 32-bit ints
-// assert(conv_args.weights.y % 32 == 0); // must be divisble by 32 for efficient alignment to unsigned 32-bit ints
- assert(conv_args.weights.z % 32 == 0); // must be divisble by 32 for efficient alignment to unsigned 32-bit ints
- //assert(conv_args.input.x % 32 == 0); // must be divisble by 32 for efficient alignment to unsigned 32-bit ints
-// assert(conv_args.input.y % 32 == 0); // must be divisble by 32 for efficient alignment to unsigned 32-bit ints
- assert(conv_args.input.z % 32 == 0); // must be divisble by 32 for efficient alignment to unsigned 32-bit ints
- assert(conv_args.weights.x <= conv_args.input.x);
- assert(conv_args.weights.y <= conv_args.input.y);
- assert(conv_args.weights.z <= conv_args.input.z);
- // <!-- DO NOT MODIFY -->
-
- return conv_args;
-}
diff --git a/src/common.h b/src/common.h
deleted file mode 100644
index bad428d..0000000
--- a/src/common.h
+++ /dev/null
@@ -1,50 +0,0 @@
-#ifndef AI2_COMMON_H
-#define AI2_COMMON_H
-
-#include <time.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <inttypes.h>
-#include <assert.h>
-#include <limits.h>
-#include <tgmath.h>
-#include <unistd.h>
-#include <stdint.h>
-//#include <gperftools/profiler.h>
-#include <sys/time.h>
-
-typedef uint32_t BINARY_WORD;
-#define BITS_PER_BINARY_WORD (sizeof(BINARY_WORD) * CHAR_BIT)
-
-typedef struct{
- struct timespec requestStart;
- struct timespec requestEnd;
-} Timer;
-
-typedef struct {
- size_t x;
- size_t y;
- size_t z;
-} dim3;
-
-typedef struct {
- dim3 weights;
- dim3 input;
- dim3 output;
- dim3 alpha_plane;
- dim3 beta_plane;
- dim3 gamma_plane;
- dim3 zeta_plane;
-} ConvolutionArgs;
-
-// Timer stuff
-double getElapsedTime(Timer *timer); // Returns the time in ms
-void start_timer(Timer *timer);
-void stop_timer(Timer *timer);
-
-BINARY_WORD * mallocBinaryVolume(dim3 vol);
-float * mallocFloatVolume(dim3 vol);
-ConvolutionArgs initArgs(size_t ix, size_t iy, size_t iz, size_t wx, size_t wy, size_t wz);
-double getSizeBytesBinaryArray(dim3 conv_args);
-
-#endif
--
Gitblit v1.10.0