[cig-commits] r15046 - seismo/3D/SPECFEM3D_SESAME/trunk
dkomati1 at geodynamics.org
dkomati1 at geodynamics.org
Mon May 25 09:57:09 PDT 2009
Author: dkomati1
Date: 2009-05-25 09:57:09 -0700 (Mon, 25 May 2009)
New Revision: 15046
Modified:
seismo/3D/SPECFEM3D_SESAME/trunk/compute_forces_with_Deville.f90
seismo/3D/SPECFEM3D_SESAME/trunk/flags.guess
Log:
manually inlined the Deville et al. (2002) products to avoid calling some functions
(which some compilers could fail to inline)
Modified: seismo/3D/SPECFEM3D_SESAME/trunk/compute_forces_with_Deville.f90
===================================================================
--- seismo/3D/SPECFEM3D_SESAME/trunk/compute_forces_with_Deville.f90 2009-05-25 16:37:44 UTC (rev 15045)
+++ seismo/3D/SPECFEM3D_SESAME/trunk/compute_forces_with_Deville.f90 2009-05-25 16:57:09 UTC (rev 15046)
@@ -84,6 +84,35 @@
real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
real(kind=CUSTOM_REAL) kappal
+! manually inline the calls to the Deville et al. (2002) routines
+ real(kind=4), dimension(NGLLX,m2) :: B1_m1_m2_5points,B2_m1_m2_5points,B3_m1_m2_5points
+ real(kind=4), dimension(m1,m2) :: C1_m1_m2_5points,C2_m1_m2_5points,C3_m1_m2_5points
+ real(kind=4), dimension(m1,m2) :: E1_m1_m2_5points,E2_m1_m2_5points,E3_m1_m2_5points
+
+ equivalence(dummyx_loc,B1_m1_m2_5points)
+ equivalence(dummyy_loc,B2_m1_m2_5points)
+ equivalence(dummyz_loc,B3_m1_m2_5points)
+ equivalence(tempx1,C1_m1_m2_5points)
+ equivalence(tempy1,C2_m1_m2_5points)
+ equivalence(tempz1,C3_m1_m2_5points)
+ equivalence(newtempx1,E1_m1_m2_5points)
+ equivalence(newtempy1,E2_m1_m2_5points)
+ equivalence(newtempz1,E3_m1_m2_5points)
+
+ real(kind=4), dimension(m2,NGLLX) :: A1_mxm_m2_m1_5points,A2_mxm_m2_m1_5points,A3_mxm_m2_m1_5points
+ real(kind=4), dimension(m2,m1) :: C1_mxm_m2_m1_5points,C2_mxm_m2_m1_5points,C3_mxm_m2_m1_5points
+ real(kind=4), dimension(m2,m1) :: E1_mxm_m2_m1_5points,E2_mxm_m2_m1_5points,E3_mxm_m2_m1_5points
+
+ equivalence(dummyx_loc,A1_mxm_m2_m1_5points)
+ equivalence(dummyy_loc,A2_mxm_m2_m1_5points)
+ equivalence(dummyz_loc,A3_mxm_m2_m1_5points)
+ equivalence(tempx3,C1_mxm_m2_m1_5points)
+ equivalence(tempy3,C2_mxm_m2_m1_5points)
+ equivalence(tempz3,C3_mxm_m2_m1_5points)
+ equivalence(newtempx3,E1_mxm_m2_m1_5points)
+ equivalence(newtempy3,E2_mxm_m2_m1_5points)
+ equivalence(newtempz3,E3_mxm_m2_m1_5points)
+
integer :: isource
double precision :: t0,f0
@@ -102,18 +131,82 @@
enddo
enddo
-!! DK DK subroutines adapted from Deville, Fischer and Mund, High-order methods
-!! DK DK for incompressible fluid flow, Cambridge University Press (2002),
-!! DK DK pages 386 and 389 and Figure 8.3.1
- call mxm_m1_m2_5points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
+! subroutines adapted from Deville, Fischer and Mund, High-order methods
+! for incompressible fluid flow, Cambridge University Press (2002),
+! pages 386 and 389 and Figure 8.3.1
+! call mxm_m1_m2_5points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
+ do j=1,m2
+ do i=1,m1
+ C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
+ hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
+ hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
+ hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
+ hprime_xx(i,5)*B1_m1_m2_5points(5,j)
- do k = 1,NGLLX
- call mxm_m1_m1_5points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
- hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
+ C2_m1_m2_5points(i,j) = hprime_xx(i,1)*B2_m1_m2_5points(1,j) + &
+ hprime_xx(i,2)*B2_m1_m2_5points(2,j) + &
+ hprime_xx(i,3)*B2_m1_m2_5points(3,j) + &
+ hprime_xx(i,4)*B2_m1_m2_5points(4,j) + &
+ hprime_xx(i,5)*B2_m1_m2_5points(5,j)
+
+ C3_m1_m2_5points(i,j) = hprime_xx(i,1)*B3_m1_m2_5points(1,j) + &
+ hprime_xx(i,2)*B3_m1_m2_5points(2,j) + &
+ hprime_xx(i,3)*B3_m1_m2_5points(3,j) + &
+ hprime_xx(i,4)*B3_m1_m2_5points(4,j) + &
+ hprime_xx(i,5)*B3_m1_m2_5points(5,j)
+ enddo
enddo
- call mxm_m2_m1_5points(dummyx_loc,dummyy_loc,dummyz_loc,hprime_xxT,tempx3,tempy3,tempz3)
+! call mxm_m1_m1_5points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
+! hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
+ do j=1,m1
+ do i=1,m1
+! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+ do k = 1,NGLLX
+ tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
+ dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
+ dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
+ dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
+ dummyx_loc(i,5,k)*hprime_xxT(5,j)
+ tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
+ dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
+ dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
+ dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
+ dummyy_loc(i,5,k)*hprime_xxT(5,j)
+
+ tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
+ dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
+ dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
+ dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
+ dummyz_loc(i,5,k)*hprime_xxT(5,j)
+ enddo
+ enddo
+ enddo
+
+! call mxm_m2_m1_5points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
+ do j=1,m1
+ do i=1,m2
+ C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+ A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+ A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+ A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+ A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+
+ C2_mxm_m2_m1_5points(i,j) = A2_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+ A2_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+ A2_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+ A2_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+ A2_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+
+ C3_mxm_m2_m1_5points(i,j) = A3_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+ A3_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+ A3_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+ A3_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+ A3_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+ enddo
+ enddo
+
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -182,18 +275,82 @@
enddo
enddo
-!! DK DK subroutines adapted from Deville, Fischer and Mund, High-order methods
-!! DK DK for incompressible fluid flow, Cambridge University Press (2002),
-!! DK DK pages 386 and 389 and Figure 8.3.1
- call mxm_m1_m2_5points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
+! subroutines adapted from Deville, Fischer and Mund, High-order methods
+! for incompressible fluid flow, Cambridge University Press (2002),
+! pages 386 and 389 and Figure 8.3.1
+! call mxm_m1_m2_5points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
+ do j=1,m2
+ do i=1,m1
+ E1_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_5points(1,j) + &
+ hprimewgll_xxT(i,2)*C1_m1_m2_5points(2,j) + &
+ hprimewgll_xxT(i,3)*C1_m1_m2_5points(3,j) + &
+ hprimewgll_xxT(i,4)*C1_m1_m2_5points(4,j) + &
+ hprimewgll_xxT(i,5)*C1_m1_m2_5points(5,j)
- do k = 1,NGLLX
- call mxm_m1_m1_5points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
- hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
+ E2_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_5points(1,j) + &
+ hprimewgll_xxT(i,2)*C2_m1_m2_5points(2,j) + &
+ hprimewgll_xxT(i,3)*C2_m1_m2_5points(3,j) + &
+ hprimewgll_xxT(i,4)*C2_m1_m2_5points(4,j) + &
+ hprimewgll_xxT(i,5)*C2_m1_m2_5points(5,j)
+
+ E3_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_5points(1,j) + &
+ hprimewgll_xxT(i,2)*C3_m1_m2_5points(2,j) + &
+ hprimewgll_xxT(i,3)*C3_m1_m2_5points(3,j) + &
+ hprimewgll_xxT(i,4)*C3_m1_m2_5points(4,j) + &
+ hprimewgll_xxT(i,5)*C3_m1_m2_5points(5,j)
+ enddo
enddo
- call mxm_m2_m1_5points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
+! call mxm_m1_m1_5points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
+! hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
+ do i=1,m1
+ do j=1,m1
+! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+ do k = 1,NGLLX
+ newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
+ tempx2(i,2,k)*hprimewgll_xx(2,j) + &
+ tempx2(i,3,k)*hprimewgll_xx(3,j) + &
+ tempx2(i,4,k)*hprimewgll_xx(4,j) + &
+ tempx2(i,5,k)*hprimewgll_xx(5,j)
+ newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
+ tempy2(i,2,k)*hprimewgll_xx(2,j) + &
+ tempy2(i,3,k)*hprimewgll_xx(3,j) + &
+ tempy2(i,4,k)*hprimewgll_xx(4,j) + &
+ tempy2(i,5,k)*hprimewgll_xx(5,j)
+
+ newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
+ tempz2(i,2,k)*hprimewgll_xx(2,j) + &
+ tempz2(i,3,k)*hprimewgll_xx(3,j) + &
+ tempz2(i,4,k)*hprimewgll_xx(4,j) + &
+ tempz2(i,5,k)*hprimewgll_xx(5,j)
+ enddo
+ enddo
+ enddo
+
+! call mxm_m2_m1_5points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
+ do j=1,m1
+ do i=1,m2
+ E1_mxm_m2_m1_5points(i,j) = C1_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
+ C1_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
+ C1_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
+ C1_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
+ C1_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
+
+ E2_mxm_m2_m1_5points(i,j) = C2_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
+ C2_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
+ C2_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
+ C2_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
+ C2_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
+
+ E3_mxm_m2_m1_5points(i,j) = C3_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
+ C3_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
+ C3_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
+ C3_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
+ C3_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
+ enddo
+ enddo
+
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -257,20 +414,23 @@
end subroutine compute_forces_with_Deville
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!! DK DK subroutines adapted from Deville, Fischer and Mund, High-order methods
-!! DK DK for incompressible fluid flow, Cambridge University Press (2002),
-!! DK DK pages 386 and 389 and Figure 8.3.1
+! subroutines adapted from Deville, Fischer and Mund, High-order methods
+! for incompressible fluid flow, Cambridge University Press (2002),
+! pages 386 and 389 and Figure 8.3.1
- subroutine mxm_m1_m2_5points(A,B1,B2,B3,C1,C2,C3)
+ subroutine old_mxm_m1_m2_5points(A,B1,B2,B3,C1,C2,C3)
implicit none
include "constants.h"
- real(kind=CUSTOM_REAL), dimension(m1,NGLLX) :: A
- real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1,B2,B3
- real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1,C2,C3
+ real(kind=4), dimension(m1,NGLLX) :: A
+ real(kind=4), dimension(NGLLX,m2) :: B1,B2,B3
+ real(kind=4), dimension(m1,m2) :: C1,C2,C3
integer :: i,j
@@ -298,19 +458,19 @@
enddo
enddo
- end subroutine mxm_m1_m2_5points
+ end subroutine old_mxm_m1_m2_5points
!---------
- subroutine mxm_m1_m1_5points(A1,A2,A3,B,C1,C2,C3)
+ subroutine old_mxm_m1_m1_5points(A1,A2,A3,B,C1,C2,C3)
implicit none
include "constants.h"
- real(kind=CUSTOM_REAL), dimension(m1,NGLLX) :: A1,A2,A3
- real(kind=CUSTOM_REAL), dimension(NGLLX,m1) :: B
- real(kind=CUSTOM_REAL), dimension(m1,m1) :: C1,C2,C3
+ real(kind=4), dimension(m1,NGLLX) :: A1,A2,A3
+ real(kind=4), dimension(NGLLX,m1) :: B
+ real(kind=4), dimension(m1,m1) :: C1,C2,C3
integer :: i,j
@@ -338,19 +498,19 @@
enddo
enddo
- end subroutine mxm_m1_m1_5points
+ end subroutine old_mxm_m1_m1_5points
!---------
- subroutine mxm_m2_m1_5points(A1,A2,A3,B,C1,C2,C3)
+ subroutine old_mxm_m2_m1_5points(A1,A2,A3,B,C1,C2,C3)
implicit none
include "constants.h"
- real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: A1,A2,A3
- real(kind=CUSTOM_REAL), dimension(NGLLX,m1) :: B
- real(kind=CUSTOM_REAL), dimension(m2,m1) :: C1,C2,C3
+ real(kind=4), dimension(m2,NGLLX) :: A1,A2,A3
+ real(kind=4), dimension(NGLLX,m1) :: B
+ real(kind=4), dimension(m2,m1) :: C1,C2,C3
integer :: i,j
@@ -378,5 +538,5 @@
enddo
enddo
- end subroutine mxm_m2_m1_5points
+ end subroutine old_mxm_m2_m1_5points
Modified: seismo/3D/SPECFEM3D_SESAME/trunk/flags.guess
===================================================================
--- seismo/3D/SPECFEM3D_SESAME/trunk/flags.guess 2009-05-25 16:37:44 UTC (rev 15045)
+++ seismo/3D/SPECFEM3D_SESAME/trunk/flags.guess 2009-05-25 16:57:09 UTC (rev 15046)
@@ -138,7 +138,7 @@
# for the C compiler when using -q64 for the Fortran compiler
#
if test x"$FLAGS_NO_CHECK" = x; then
- FLAGS_NO_CHECK="-O3 -qsave -qstrict -q64 -qtune=auto -qarch=auto -qcache=auto -qfree=f90 -qsuffix=f=f90 -qhalt=w -qlanglvl=2003pure -qflttrap=overflow:zerodivide:invalid:enable -qsigtrap -qinitauto=7FBFFFFF -Q -Q+rank,swap_all,mxm_m1_m2_5points,mxm_m1_m1_5points,mxm_m2_m1_5points"
+ FLAGS_NO_CHECK="-O3 -qsave -qstrict -q64 -qtune=auto -qarch=auto -qcache=auto -qfree=f90 -qsuffix=f=f90 -qhalt=w -qlanglvl=2003pure -qflttrap=overflow:zerodivide:invalid:enable -qsigtrap -qinitauto=7FBFFFFF -Q -Q+rank,swap_all"
# on MareNostrum at the Barcelona SuperComputing Center (Spain) use
# -qtune=ppc970 -qarch=ppc64v instead of -qtune=auto -qarch=auto
fi
More information about the CIG-COMMITS
mailing list