#include #include #include #include #include #define REP 10 #define N 5000 /* Matrix A is stored column by column: */ #define A(i,j) (a+i*N+j) /* Matrix B is the transpose of A: */ #define B(i,j) (a+j*N+i) double* get_memory(int k) { /* allocate memory for an array of length k */ double *ptr; ptr = (double *) malloc(k*sizeof(double)); if (ptr == NULL) { printf(" failure during memory allocation\n"); exit(-1); } return ptr; } double ggl(double *ds) { /* generate u(0,1) distributed random numbers. Seed ds must be saved between calls. ggl is essentially the same as the IMSL routine RNUM. W. Petersen and M. Troyer, 24 Oct. 2002, ETHZ */ double t,d2=0.2147483647e10; t = *ds; t = fmod(0.16807e5*t,d2); *ds = t; return((t-1.0e0)/(d2-1.0e0)); } double walltime(double *t0) { double mic, time; double mega=0.000001; struct timeval tp; struct timezone tzp; static long base_sec = 0; static long base_usec = 0; (void) gettimeofday(&tp, &tzp); if (base_sec == 0) { base_sec = tp.tv_sec; base_usec = tp.tv_usec; } time = (double)(tp.tv_sec - base_sec); mic = (double)(tp.tv_usec - base_usec); time = (time + mic * mega) - *t0; return(time); } int main() { int i,j,r; int n = N; static double seed=331.0; double error12=0.0,error34=0.0; double big; double t,clockZero=0.0; double t_dot, t_dot_par, t_saxpy, t_saxpy_par; double xj; double *a=NULL; double x[N],y1[N],y2[N],y3[N],y4[N]; double z[N]; /* will be needed for parallel version of the saxpy variant */ a = get_memory(n*n); for(i=0;ierror12)?big:error12; big = fabs(y3[i]-y4[i]); error34 = (big>error34)?big:error34; } printf("\n"); printf(" matrix dimension: %d x %d\n",n,n); printf(" DOT PRODUCT VARIANT:\n"); printf(" elapsed time for y1 (serial version) = %.3f msec\n",1000.0*t_dot); printf(" speed of serial version = %.2f MFlops\n",n*n/t_dot/1000000.0); printf(" elapsed time for y2 (parallel version) = %.3f msec\n",1000.0*t_dot_par); printf(" speed of parallel version = %.2f MFlops\n",n*n/t_dot_par/1000000.0); printf(" max error between y1 and y2 = %.8e\n",error12); printf(" speed-up = %.1f\n",t_dot/t_dot_par); printf(" SAXPY VARIANT:\n"); printf(" elapsed time for y3 (serial version) = %.3f msec\n",1000.0*t_saxpy); printf(" speed of serial version = %.2f MFlops\n",n*n/t_saxpy/1000000.0); printf(" elapsed time for y4 (parallel version) = %.3f msec\n",1000.0*t_saxpy_par); printf(" speed of parallel version = %.2f MFlops\n",n*n/t_saxpy_par/1000000.0); printf(" max error between y3 and y4 = %.8e\n",error34); printf(" speed-up = %.1f\n",t_saxpy/t_saxpy_par); printf("\n"); free(a); }