!this program is the kernel for an optical propagation program. !it does a series of 2d ffts followed by a multiplication. !it is modeled after the AFWL program HELP or High Energy !Laser Propagation. ! !it does the 2d fft by first doing a collection of 1d ffts !then a transpose followed by a second collection of 1d ffts. ! !the sections of the program that are commented out represent !different ways of doing the operations. the most interesting !addition is using the using the subroutine shuff to generate !a nonuniform ordering for accessing the array. !the routines four1 was taken from the book !"numerical recipes in fortran, 1st addition." !however, the authors of that book derived their routine !from the routine fourn that was in the AFWL program HELP. !the original routine, fourn, contained many additional !options !disclaimer: for a production code you would most likely !use a vendor supplied library to do the fft instead of a !hand written one. module ccm_numz ! basic real types integer, parameter:: b8 = selected_real_kind(10) contains function ccm_time() implicit none integer i integer :: ccm_start_time(8) = (/(-100,i=1,8)/) real(b8) :: ccm_time,tmp integer,parameter :: norm(13)=(/ & 0, 2678400, 5097600, 7776000,10368000,13046400,& 15638400,18316800,20995200,23587200,26265600,28857600,31536000/) integer,parameter :: leap(13)=(/ & 0, 2678400, 5184000, 7862400,10454400,13132800,& 15724800,18403200,21081600,23673600,26352000,28944000,31622400/) integer :: values(8),m,sec save call date_and_time(values=values) if(mod(values(1),4) .eq. 0)then m=leap(values(2)) else m=norm(values(2)) endif sec=((values(3)*24+values(5))*60+values(6))*60+values(7) tmp=real(m,b8)+real(sec,b8)+real(values(8),b8)/1000.0_b8 !write(*,*)"vals ",values if(values(1) .ne. ccm_start_time(1))then if(mod(ccm_start_time(1),4) .eq. 0)then tmp=tmp+real(leap(13),b8) else tmp=tmp+real(norm(13),b8) endif endif ccm_time=tmp end function end module ccm_numz program two_d_fft use ccm_numz implicit none ! integer size integer,parameter:: size=1024 ! integer omp_get_max_threads integer i,j,k,ijk,isign,iseed real(b8),allocatable:: x(:) integer, allocatable:: index(:) complex(b8), allocatable:: a(:,:) ! complex(b8) :: a(size,size) ! complex(b8), allocatable:: temp(:) complex(b8) tmp complex(b8) factor real(b8) gen,fft1,fft2,trans,totf,fact real(b8) t0,t1,t2,t3,t4,t5 integer OMP_GET_MAX_THREADS interface subroutine shuff(index,m,n) dimension index(:) integer m,n end subroutine end interface factor=size factor=1.0_b8/(factor) iseed=-12345 isign=1 gen=0 fft1=0 fft2=0 trans=0 totf=0 fact=0 ! read(12,*)size allocate(a(size,size)) allocate(x(size),index(size)) call shuff(index,size,8) ! dummy=ran1(iseed) ! write(*,*)"dummy=",dummy t0=ccm_time () do j=1,size call random_number(x) do i=1,size a(i,j)=cmplx(x(i),0.0_b8) enddo enddo write(*,'(("(",g20.10,",",g20.10,")"))')a(size/2+1,size-2) ! do 10 ijk=1,20 ! change to 4 to run faster do 10 ijk=1,20 t1=ccm_time () !$OMP PARALLEL DO SCHEDULE (RUNTIME) do i=1,size call four1(a(:,i),size,isign) !call four1(a(i,:),size,isign) enddo !$OMP END PARALLEL DO t2=ccm_time () !$OMP PARALLEL DO SCHEDULE (RUNTIME) PRIVATE(i,j,k,tmp) do k=1,size i=k ! i=index(k) do j=i,size ! tmp=a(j,i) ! a(j,i)=a(i,j) ! a(i,j)=tmp tmp=a(i,j) a(i,j)=a(j,i) a(j,i)=tmp enddo ! j=i ! temp(j:size)=a(i,j:size) ! a(i,j:size)=a(j:size,i) ! a(j:size,i)=temp(j:size) enddo !$OMP END PARALLEL DO t3=ccm_time () !$OMP PARALLEL DO SCHEDULE (RUNTIME) do i=1,size call four1(a(:,i),size,isign) !call four1(a(i,:),size,isign) enddo !$OMP END PARALLEL DO t4=ccm_time () !$OMP PARALLEL DO SCHEDULE (RUNTIME) do j=1,size do i=1,size a(i,j)=factor*a(i,j) enddo enddo !$OMP END PARALLEL DO t5=ccm_time () gen=gen+t1-t0 fft1=fft1+t2-t1 fft2=fft2+t4-t3 trans=trans+t3-t2 totf=totf+t5-t1 fact=fact+t5-t4 isign=isign*(-1) write(*,'(i3)',advance="no")ijk 10 continue write(*,*) write(*,'(("(",g20.10,",",g20.10,")"))')a(size/2+1,size-2) write(*,*)"number of transforms",ijk-1 !write(*,'("generation time= ",f7.1)')gen write(*,'(" fft1 time= ",f9.4)')fft1 write(*,'(" transpose time= ",f9.4)')trans write(*,'(" fft2 time= ",f9.4)')fft2 write(*,'(" scaling time= ",f9.4)')fact write(*,'(" total time = ",f9.4)',advance="no")totf write(*,'(" for matrix of size",i6)')size write(*,*)"THREADS = ",OMP_GET_MAX_THREADS() stop end program two_d_fft subroutine four1(data,nn,isign) use ccm_numz implicit none integer i,j,isign,nn,n,m,mmax,istep real(b8), parameter :: two_pi = 6.283185307179586477_b8 real(b8) wr,wi,wpr,wpi,wtemp,theta,tempr,tempi real(b8) data(16384) n=2*nn j=1 do 11 i=1,n,2 if(j.gt.i)then tempr=data(j) tempi=data(j+1) data(j)=data(i) data(j+1)=data(i+1) data(i)=tempr data(i+1)=tempi endif m=n/2 1 if ((m.ge.2).and.(j.gt.m)) then j=j-m m=m/2 go to 1 endif j=j+m 11 continue mmax=2 2 if (n.gt.mmax) then istep=2*mmax theta=two_pi/(isign*mmax) wpr=-2.0_b8*sin(0.5_b8*theta)**2 wpi=sin(theta) wr=1.0_b8 wi=0.0_b8 do 13 m=1,mmax,2 do 12 i=m,n,istep j=i+mmax tempr=(wr)*data(j)-(wi)*data(j+1) tempi=(wr)*data(j+1)+(wi)*data(j) data(j)=data(i)-tempr data(j+1)=data(i+1)-tempi data(i)=data(i)+tempr data(i+1)=data(i+1)+tempi 12 continue wtemp=wr wr=wr*wpr-wi*wpi+wr wi=wi*wpr+wtemp*wpi+wi 13 continue mmax=istep go to 2 endif return end subroutine shuff(index,m,n) integer tens,ones dimension index(:) tens=n ones=1 j=0 k=1 do while (j < m) j=j+1 if(k.gt. m)then write(*,*)k,m stop endif index(k)=j k=k+tens if(k .gt. m)then ones=ones+1 k=ones endif enddo end subroutine shuff