#include "common.h" // Returns the time in ms double getElapsedTime(Timer *timer) { // Calculate time it took in seconds double accum_ms = ( timer->requestEnd.tv_sec - timer->requestStart.tv_sec ) + ( timer->requestEnd.tv_nsec - timer->requestStart.tv_nsec ) / 1e6; return accum_ms; } void start_timer(Timer *timer) { clock_gettime(CLOCK_MONOTONIC_RAW, &(timer->requestStart)); } void stop_timer(Timer *timer) { clock_gettime(CLOCK_MONOTONIC_RAW, &(timer->requestEnd)); } BINARY_WORD * mallocBinaryVolume(dim3 vol) { return (BINARY_WORD *) malloc (vol.x * vol.y * vol.z / BITS_PER_BINARY_WORD * sizeof(BINARY_WORD)); } float * mallocFloatVolume(dim3 vol) { return (float *) malloc (vol.x * vol.y * vol.z * sizeof(float)); } // Returns the size (in bytes) of a binary array with dimensions stored in conv_args double getSizeBytesBinaryArray(dim3 conv_args) { return conv_args.x * conv_args.y * conv_args.z * sizeof(BINARY_WORD) / (BITS_PER_BINARY_WORD); } ConvolutionArgs initArgs(size_t ix, size_t iy, size_t iz, size_t wx, size_t wy, size_t wz) { ConvolutionArgs conv_args; // Input Volume conv_args.input.x = ix; // x == y for a square face conv_args.input.y = iy; conv_args.input.z = iz; conv_args.weights.x = wx; // x == y for square face conv_args.weights.y = wy; conv_args.weights.z = wz; // // Intermediate Volumes conv_args.alpha_plane.x = conv_args.weights.x; conv_args.alpha_plane.y = conv_args.weights.y; conv_args.alpha_plane.z = 1; conv_args.beta_plane.x = 1; conv_args.beta_plane.y = conv_args.input.y; conv_args.beta_plane.z = conv_args.input.z; conv_args.gamma_plane.x = conv_args.input.x * conv_args.weights.x; conv_args.gamma_plane.y = conv_args.input.y * conv_args.weights.y; conv_args.gamma_plane.z = 1; conv_args.zeta_plane.x = conv_args.gamma_plane.x; conv_args.zeta_plane.y = conv_args.gamma_plane.y; conv_args.zeta_plane.z = 1; // Output Volume conv_args.output.x = conv_args.input.x; conv_args.output.y = conv_args.input.y; conv_args.output.z = 1; // Output should be a 2D plane // Verify dimensions //assert(conv_args.weights.x % 32 == 0); // must be divisble by 32 for efficient alignment to unsigned 32-bit ints // assert(conv_args.weights.y % 32 == 0); // must be divisble by 32 for efficient alignment to unsigned 32-bit ints assert(conv_args.weights.z % 32 == 0); // must be divisble by 32 for efficient alignment to unsigned 32-bit ints //assert(conv_args.input.x % 32 == 0); // must be divisble by 32 for efficient alignment to unsigned 32-bit ints // assert(conv_args.input.y % 32 == 0); // must be divisble by 32 for efficient alignment to unsigned 32-bit ints assert(conv_args.input.z % 32 == 0); // must be divisble by 32 for efficient alignment to unsigned 32-bit ints assert(conv_args.weights.x <= conv_args.input.x); assert(conv_args.weights.y <= conv_args.input.y); assert(conv_args.weights.z <= conv_args.input.z); // return conv_args; }