#define FLT double #define INT int #include "mpi.h" #include #include #include #include #if macintosh #include #endif FLT **matrix(INT nrl,INT nrh,INT ncl,INT nch); FLT *vector(INT nl, INT nh); INT *ivector(INT nl, INT nh); INT mint(FLT x); FLT walltime(); void bc(FLT ** psi,INT i1,INT i2,INT j1,INT j2); void do_jacobi(FLT ** psi,FLT ** new_psi,FLT *diff,INT i1,INT i2,INT j1,INT j2); void write_grid(FLT ** psi,INT i1,INT i2,INT j1,INT j2); void do_transfer(FLT ** psi,INT i1,INT i2,INT j1,INT j2); void do_force (INT i1,INT i2,INT j1,INT j2); void do_transfer(FLT ** psi,INT i1,INT i2,INT j1,INT j2); char* unique(char *name); FLT force(FLT y); #define pi 3.141592653589793239 FLT **the_for; FLT dx,dy,a1,a2,a3,a4,a5,a6; INT nx,ny; FLT alpha; FLT *svec1,*svec2,*rvec1,*rvec2; INT numnodes,myid,mpi_err; #define mpi_master 0 INT myrow,mycol; INT nrow,ncol; INT myrow,mycol; INT myid_col,myid_row,nodes_row,nodes_col; MPI_Status status; MPI_Comm ROW_COMM,COL_COMM; INT mytop,mybot,myleft,myright; MPI_Status stat_ray[8]; MPI_Request req[8]; int main(int argc, char **argv) { FLT lx,ly,beta,gamma; INT steps; FLT t1,t2; /*FLT t3,t4,dt; */ /* FLT diff */ FLT mydiff,diff; FLT dx2,dy2,bottom; FLT di,dj; FLT **psi; /* our calculation grid */ FLT **new_psi; /* temp storage for the grid */ INT i,j,i1,i2,j1,j2; INT iout; #if macintosh argc=ccommand(&argv); #endif mpi_err=MPI_Init(&argc,&argv); mpi_err=MPI_Comm_size(MPI_COMM_WORLD,&numnodes); mpi_err=MPI_Comm_rank(MPI_COMM_WORLD,&myid); /* ! find a reasonable grid topology based on the number ! of processors */ nrow=mint(sqrt((FLT)(numnodes))); ncol=numnodes/nrow; while (nrow*ncol != numnodes) { nrow=nrow+1; ncol=numnodes/nrow; } if(nrow > ncol){ i=ncol; ncol=nrow; nrow=i; } myrow=myid/ncol+1; mycol=myid - (myrow-1)*ncol + 1; if(myid == mpi_master) printf(" nrow= %d ncol= %d\n",nrow ,ncol); /* ! make the row and col communicators ! all processors with the same row will be in the same ROW_COMM */ mpi_err=MPI_Comm_split(MPI_COMM_WORLD,myrow,mycol,&ROW_COMM); mpi_err=MPI_Comm_rank( ROW_COMM, &myid_row); mpi_err=MPI_Comm_size( ROW_COMM, &nodes_row); /* ! all processors with the same col will be in the same COL_COMM */ mpi_err=MPI_Comm_split(MPI_COMM_WORLD,mycol,myrow,&COL_COMM); mpi_err=MPI_Comm_rank( COL_COMM, &myid_col); mpi_err=MPI_Comm_size( COL_COMM,& nodes_col); /* ! find id of neighbors using the communicators created above */ mytop = myid_col-1;if( mytop < 0 )mytop = MPI_PROC_NULL; mybot = myid_col+1;if( mybot == nodes_col)mybot = MPI_PROC_NULL; myleft = myid_row-1;if( myleft < 0 )myleft = MPI_PROC_NULL; myright = myid_row+1;if( myright == nodes_row)myright = MPI_PROC_NULL; if(myid == mpi_master) { scanf("%d %d",&nx,&ny); scanf("%lg %lg",&lx,&ly); scanf("%lg %lg %lg",&alpha,&beta,&gamma); scanf("%d",&steps); printf("%d %d\n",nx,ny); printf("%g %g\n",lx,ly); printf("%g %g %g\n",alpha,beta,gamma); printf("%d\n",steps); } mpi_err=MPI_Bcast(&nx, 1,MPI_INT, mpi_master,MPI_COMM_WORLD); mpi_err=MPI_Bcast(&ny, 1,MPI_INT, mpi_master,MPI_COMM_WORLD); mpi_err=MPI_Bcast(&steps,1,MPI_INT, mpi_master,MPI_COMM_WORLD); mpi_err=MPI_Bcast(&lx, 1,MPI_DOUBLE,mpi_master,MPI_COMM_WORLD); mpi_err=MPI_Bcast(&ly, 1,MPI_DOUBLE,mpi_master,MPI_COMM_WORLD); mpi_err=MPI_Bcast(&alpha,1,MPI_DOUBLE,mpi_master,MPI_COMM_WORLD); mpi_err=MPI_Bcast(&beta, 1,MPI_DOUBLE,mpi_master,MPI_COMM_WORLD); mpi_err=MPI_Bcast(&gamma,1,MPI_DOUBLE,mpi_master,MPI_COMM_WORLD); /* calculate the constants for the calculations */ dx=lx/(nx+1); dy=ly/(ny+1); dx2=dx*dx; dy2=dy*dy; bottom=2.0*(dx2+dy2); a1=(dy2/bottom)+(beta*dx2*dy2)/(2.0*gamma*dx*bottom); a2=(dy2/bottom)-(beta*dx2*dy2)/(2.0*gamma*dx*bottom); a3=dx2/bottom; a4=dx2/bottom; a5=dx2*dy2/(gamma*bottom); a6=pi/(ly); /* set the indices for the interior of the grid */ dj=(FLT)ny/(FLT)nodes_row; j1=mint(1.0+myid_row*dj); j2=mint(1.0+(myid_row+1)*dj)-1; di=(FLT)nx/(FLT)nodes_col; i1=mint(1.0+myid_col*di); i2=mint(1.0+(myid_col+1)*di)-1; if(myid == mpi_master)printf("nodes_row= %d nodes_col= %d\n",nodes_row,nodes_col); printf("myid= %d myrow= %d mycol= %d\n",myid,myrow,mycol); printf("myid= %d myid_row= %d myid_col= %d\n",myid,myid_row,myid_col); printf("myid= %d holds [%d:%d][%d:%d]\n",myid,i1,i2,j1,j2); /* allocate the grid to (i1-1:i2+1,j1-1:j2+1) this includes boundary cells */ psi= matrix((INT)(i1-1),(INT)(i2+1),(INT)(j1-1),(INT)(j2+1)); new_psi=matrix((INT)(i1-1),(INT)(i2+1),(INT)(j1-1),(INT)(j2+1)); the_for=matrix((INT)(i1-1),(INT)(i2+1),(INT)(j1-1),(INT)(j2+1)); svec1=vector((INT)(i1-1),(INT)(i2+1)); svec2=vector((INT)(i1-1),(INT)(i2+1)); rvec1=vector((INT)(i1-1),(INT)(i2+1)); rvec2=vector((INT)(i1-1),(INT)(i2+1)); /* set initial guess for the value of the grid */ for(i=i1-1;i<=i2+1;i++) for(j=j1-1;j<=j2+1;j++) psi[i][j]=1.0; /* set boundary conditions */ bc(psi,i1,i2,j1,j2); do_force(i1,i2,j1,j2); /* do the jacobian iterations */ t1=MPI_Wtime(); iout=steps/100; if(iout == 0)iout=1; if(steps > 0){ for( i=1; i<=steps;i++) { do_jacobi(psi,new_psi,&mydiff,i1,i2,j1,j2); do_transfer(psi,i1,i2,j1,j2); mpi_err= MPI_Reduce(&mydiff,&diff,1,MPI_DOUBLE,MPI_SUM,mpi_master,MPI_COMM_WORLD); if(myid == mpi_master && i % iout == 0){ printf("%8d %15.5f\n",i,diff); } } } t2=MPI_Wtime(); if(myid == mpi_master)printf("run time = %10.3g\n",t2-t1); write_grid(psi,i1,i2,j1,j2); mpi_err = MPI_Finalize(); return 0; } void bc(FLT ** psi,INT i1,INT i2,INT j1,INT j2){ /* sets the boundary conditions */ /* input is the grid and the indices for the interior cells */ INT j; /* do the top edges */ if(i1 == 1) { for(j=j1-1;j<=j2+1;j++) psi[i1-1][j]=0.0; } /* do the bottom edges */ if(i2 == ny) { for(j=j1-1;j<=j2+1;j++) psi[i2+1][j]=0.0; } /* do left edges */ if(j1 == 1) { for(j=i1-1;j<=i2+1;j++) psi[j][j1-1]=0.0; } /* do right edges */ if(j2 == nx) { for(j=i1-1;j<=i2+1;j++) psi[j][j2+1]=0.0; } } void do_jacobi(FLT ** psi,FLT ** new_psi,FLT *diff,INT i1,INT i2,INT j1,INT j2){ /* ! does a single Jacobi iteration step ! input is the grid and the indices for the interior cells ! new_psi is temp storage for the the updated grid ! output is the updated grid in psi and diff which is ! the sum of the differences between the old and new grids */ INT i,j; *diff=0.0; for( i=i1;i<=i2;i++) { for(j=j1;j<=j2;j++){ new_psi[i][j]=a1*psi[i+1][j] + a2*psi[i-1][j] + a3*psi[i][j+1] + a4*psi[i][j-1] - a5*the_for[i][j]; *diff=*diff+fabs(new_psi[i][j]-psi[i][j]); } } for( i=i1;i<=i2;i++) for(j=j1;j<=j2;j++) psi[i][j]=new_psi[i][j]; } void do_force (INT i1,INT i2,INT j1,INT j2) { /* ! sets the force conditions ! input is the grid and the indices for the interior cells */ FLT y; INT i,j; for( i=i1;i<=i2;i++) { for(j=j1;j<=j2;j++){ y=j*dy; the_for[i][j]=force(y); } } } FLT force(FLT y) { return (-alpha*sin(y*a6)); } /* The routines matrix, ivector and vector were adapted from Numerical Recipes in C The Art of Scientific Computing Press, Flannery, Teukolsky, Vetting Cambridge University Press, 1988. */ FLT **matrix(INT nrl,INT nrh,INT ncl,INT nch) { INT i; FLT **m; m=(FLT **) malloc((unsigned) (nrh-nrl+1)*sizeof(FLT*)); if (!m){ printf("allocation failure 1 in matrix()\n"); exit(1); } m -= nrl; for(i=nrl;i<=nrh;i++) { if(i == nrl){ m[i]=(FLT *) malloc((unsigned) (nrh-nrl+1)*(nch-ncl+1)*sizeof(FLT)); if (!m[i]){ printf("allocation failure 2 in matrix()\n"); exit(1); } m[i] -= ncl; } else { m[i]=m[i-1]+(nch-ncl+1); } } return m; } INT *ivector(INT nl, INT nh) { INT *v; v=(INT *)malloc((unsigned) (nh-nl+1)*sizeof(INT)); if (!v) { printf("allocation failure in ivector()\n"); exit(1); } return v-nl; } FLT *vector(INT nl, INT nh) { FLT *v; v=(FLT *)malloc((unsigned) (nh-nl+1)*sizeof(FLT)); if (!v) { printf("allocation failure in vector()\n"); exit(1); } return v-nl; } void do_transfer(FLT ** psi,INT i1,INT i2,INT j1,INT j2) { INT num_x,num_y; INT ireq; INT i,j; num_x=i2-i1+3; num_y=j2-j1+3; ireq=0; for(i=i1-1;i<=i2+1;i++){ svec1[i]=psi[i][j1]; svec2[i]=psi[i][j2]; } if((myid_col % 2) == 0){ /* send to left */ mpi_err=MPI_Isend(&svec1[i1-1],num_x,MPI_DOUBLE,myleft,100,ROW_COMM,&req[ireq]);ireq++; /* rec from left */ mpi_err=MPI_Irecv(&rvec1[i1-1],num_x,MPI_DOUBLE,myleft,100,ROW_COMM,&req[ireq]);ireq++; /* rec from right */ mpi_err=MPI_Irecv(&rvec2[i1-1],num_x,MPI_DOUBLE,myright,100,ROW_COMM,&req[ireq]);ireq++; /* send to right */ mpi_err=MPI_Isend(&svec2[i1-1],num_x,MPI_DOUBLE,myright,100,ROW_COMM,&req[ireq]);ireq++; } else { /* we are on an odd col processor */ /* rec from right */ mpi_err=MPI_Irecv(&rvec2[i1-1],num_x,MPI_DOUBLE,myright,100,ROW_COMM,&req[ireq]);ireq++; /* send to right */ mpi_err=MPI_Isend(&svec2[i1-1],num_x,MPI_DOUBLE,myright,100,ROW_COMM,&req[ireq]);ireq++; /* send to left */ mpi_err=MPI_Isend(&svec1[i1-1],num_x,MPI_DOUBLE,myleft,100,ROW_COMM,&req[ireq]);ireq++; /* rec from left */ mpi_err=MPI_Irecv(&rvec1[i1-1],num_x,MPI_DOUBLE,myleft,100,ROW_COMM,&req[ireq]);ireq++; } if((myid_row % 2) == 0){ /* send to top */ mpi_err=MPI_Isend(&psi[i1][j1-1], num_y,MPI_DOUBLE,mytop,10, COL_COMM,&req[ireq]);ireq++; /* rec from top */ mpi_err=MPI_Irecv(&psi[i1-1][j1-1],num_y,MPI_DOUBLE,mytop,10,COL_COMM,&req[ireq]);ireq++; /* rec from bot */ mpi_err=MPI_Irecv(&psi[i2+1][j1-1],num_y,MPI_DOUBLE,mybot,10,COL_COMM,&req[ireq]);ireq++; /* send to bot */ mpi_err=MPI_Isend(&psi[i2][j1-1], num_y,MPI_DOUBLE,mybot,10, COL_COMM,&req[ireq]);ireq++; } else{ /* rec from bot */ mpi_err=MPI_Irecv(&psi[i2+1][j1-1],num_y,MPI_DOUBLE,mybot,10,COL_COMM,&req[ireq]);ireq++; /* send to bot */ mpi_err=MPI_Isend(&psi[i2][j1-1], num_y,MPI_DOUBLE,mybot,10,COL_COMM,&req[ireq]);ireq++; /* send to top */ mpi_err=MPI_Isend(&psi[i1][j1-1], num_y,MPI_DOUBLE,mytop,10,COL_COMM,&req[ireq]);ireq++; /* rec from top */ mpi_err=MPI_Irecv(&psi[i1-1][j1-1],num_y,MPI_DOUBLE,mytop,10,COL_COMM,&req[ireq]);ireq++; } MPI_Waitall(ireq,req,stat_ray); if(myleft != MPI_PROC_NULL){ for(i=i1-1;i<=i2+1;i++){ psi[i][j1-1]=rvec1[i]; } } if(myright != MPI_PROC_NULL){ for(i=i1-1;i<=i2+1;i++){ psi[i][j2+1]=rvec2[i]; } } } char* unique(char *name) { static char unique_str[40]; int i; for(i=0;i<40;i++) unique_str[i]=(char)0; if(myid > 99){ sprintf(unique_str,"%s%d",name,myid); } else { if(myid > 9) sprintf(unique_str,"%s0%d",name,myid); else sprintf(unique_str,"%s00%d",name,myid); } return unique_str; } void write_grid(FLT ** psi,INT i1,INT i2,INT j1,INT j2) { /* ! input is the grid and the indices for the interior cells */ INT i,j,k,i0,j0,i3,j3; INT istart,iend,jstart,jend; INT *counts,*offsets; INT dj,mystart,myend,icol; FLT *arow; FILE *f18; /* ! the master prints the whole grid a line at a time. ! for a given line, each processor checks to see if ! it holds part of that line and then sends the ! number of cells held using the MPI_GATHER. the ! MPI_GATHERV is then used to send the data */ istart=i1; iend=i2; jstart=j1; jend=j2; if(istart == 1)istart=0; if(jstart == 1)jstart=0; if(iend == nx)iend=nx+1; if(jend == ny)jend=ny+1; i0=0; j0=0; i3=nx+1; j3=ny+1; if(myid == mpi_master){ f18=fopen(unique("out4c_"),"w"); fprintf(f18,"%6d %6d\n",i3-i0+1,j3-j0+1); arow=vector(j0,j3); offsets=ivector((INT)0,(INT)(numnodes-1)); counts=ivector((INT)0,(INT)(numnodes-1)); offsets[0]=0; } for( i=i0;i<=i3;i++){ if(i >= istart && i <= iend){ dj=jend-jstart+1; mystart=jstart; myend=jend; icol=i; } else { dj=0; mystart=jstart; myend=jstart; icol=istart; } mpi_err=MPI_Gather(&dj, 1,MPI_INT, counts,1,MPI_INT, mpi_master,MPI_COMM_WORLD); if(myid == mpi_master){ for( k=1;k<=numnodes-1;k++){ offsets[k]=counts[k-1]+offsets[k-1]; } } mpi_err=MPI_Gatherv(&psi[icol][mystart],dj, MPI_DOUBLE, arow, counts,offsets,MPI_DOUBLE, mpi_master,MPI_COMM_WORLD); if(myid == mpi_master){ for( j=j0;j<=j3;j++){ fprintf(f18,"%14.7g",arow[j]); if(j != j3)fprintf(f18," "); } fprintf(f18,"\n"); } } if(myid == mpi_master)fclose(f18); } INT mint(FLT x) { FLT y; INT j; j=(INT)x; y=(FLT)j; if(x-y >= 0.5)j++; return j; } FLT walltime() { return((FLT)clock()/((FLT)CLOCKS_PER_SEC)); }