[cig-commits] r13803 - seismo/3D/SPECFEM3D_SESAME/trunk
dkomati1 at geodynamics.org
dkomati1 at geodynamics.org
Sat Dec 27 14:34:47 PST 2008
Author: dkomati1
Date: 2008-12-27 14:34:47 -0800 (Sat, 27 Dec 2008)
New Revision: 13803
Added:
seismo/3D/SPECFEM3D_SESAME/trunk/compute_forces_no_Deville.f90
Modified:
seismo/3D/SPECFEM3D_SESAME/trunk/Makefile.in
seismo/3D/SPECFEM3D_SESAME/trunk/compute_forces.f90
seismo/3D/SPECFEM3D_SESAME/trunk/constants.h.in
seismo/3D/SPECFEM3D_SESAME/trunk/flags.guess
seismo/3D/SPECFEM3D_SESAME/trunk/specfem3D.f90
Log:
switched to optimized matrix multiplication routines from Deville et al. (2002) to improve performance
Modified: seismo/3D/SPECFEM3D_SESAME/trunk/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D_SESAME/trunk/Makefile.in 2008-12-27 21:20:45 UTC (rev 13802)
+++ seismo/3D/SPECFEM3D_SESAME/trunk/Makefile.in 2008-12-27 22:34:47 UTC (rev 13803)
@@ -487,7 +487,7 @@
${FCCOMPILE_CHECK} -c -o $O/compute_boundary_kernel.o compute_boundary_kernel.f90
$O/compute_forces.o: constants.h compute_forces.f90
- ${FCCOMPILE_CHECK} -c -o $O/compute_forces.o compute_forces.f90
+ ${FCCOMPILE_NO_CHECK} -c -o $O/compute_forces.o compute_forces.f90
$O/combine_vol_data.o: constants.h combine_vol_data.f90
${FCCOMPILE_CHECK} -c -o $O/combine_vol_data.o combine_vol_data.f90
Modified: seismo/3D/SPECFEM3D_SESAME/trunk/compute_forces.f90
===================================================================
--- seismo/3D/SPECFEM3D_SESAME/trunk/compute_forces.f90 2008-12-27 21:20:45 UTC (rev 13802)
+++ seismo/3D/SPECFEM3D_SESAME/trunk/compute_forces.f90 2008-12-27 22:34:47 UTC (rev 13803)
@@ -24,10 +24,9 @@
!=====================================================================
subroutine compute_forces(NSPEC_AB,NGLOB_AB,displ,accel,xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
kappastore,mustore,jacobian,ibool,ispec_is_inner,phase_is_inner, &
- NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,xi_source,eta_source,gamma_source,nu_source,hdur,dt &
- )
+ NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,xi_source,eta_source,gamma_source,nu_source,hdur,dt)
implicit none
@@ -46,13 +45,14 @@
kappastore,mustore,jacobian
! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
- real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+ newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
+
! communication overlap
logical, dimension(NSPEC_AB) :: ispec_is_inner
logical :: phase_is_inner
@@ -69,7 +69,7 @@
tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
integer ispec,iglob
- integer i,j,k,l
+ integer i,j,k
real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
@@ -79,13 +79,8 @@
real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
- real(kind=CUSTOM_REAL) hp1,hp2,hp3
real(kind=CUSTOM_REAL) fac1,fac2,fac3
- real(kind=CUSTOM_REAL) tempx1l,tempx2l,tempx3l
- real(kind=CUSTOM_REAL) tempy1l,tempy2l,tempy3l
- real(kind=CUSTOM_REAL) tempz1l,tempz2l,tempz3l
-
real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
real(kind=CUSTOM_REAL) kappal
@@ -100,43 +95,30 @@
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec)
+ dummyx_loc(i,j,k) = displ(1,iglob)
+ dummyy_loc(i,j,k) = displ(2,iglob)
+ dummyz_loc(i,j,k) = displ(3,iglob)
+ enddo
+ enddo
+ enddo
- tempx1l = 0.
- tempx2l = 0.
- tempx3l = 0.
+!! DK DK subroutines adapted from Deville, Fischer and Mund, High-order methods
+!! DK DK for incompressible fluid flow, Cambridge University Press (2002),
+!! DK DK pages 386 and 389 and Figure 8.3.1
+ call mxm_m1_m2_5points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
- tempy1l = 0.
- tempy2l = 0.
- tempy3l = 0.
+ do k = 1,NGLLX
+ call mxm_m1_m1_5points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
+ hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
+ enddo
- tempz1l = 0.
- tempz2l = 0.
- tempz3l = 0.
+ call mxm_m2_m1_5points(dummyx_loc,dummyy_loc,dummyz_loc,hprime_xxT,tempx3,tempy3,tempz3)
- do l=1,NGLLX
- hp1 = hprime_xx(i,l)
- iglob = ibool(l,j,k,ispec)
- tempx1l = tempx1l + displ(1,iglob)*hp1
- tempy1l = tempy1l + displ(2,iglob)*hp1
- tempz1l = tempz1l + displ(3,iglob)*hp1
-!!! can merge these loops because NGLLX = NGLLY = NGLLZ enddo
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
-!!! can merge these loops because NGLLX = NGLLY = NGLLZ do l=1,NGLLY
- hp2 = hprime_yy(j,l)
- iglob = ibool(i,l,k,ispec)
- tempx2l = tempx2l + displ(1,iglob)*hp2
- tempy2l = tempy2l + displ(2,iglob)*hp2
- tempz2l = tempz2l + displ(3,iglob)*hp2
-!!! can merge these loops because NGLLX = NGLLY = NGLLZ enddo
-
-!!! can merge these loops because NGLLX = NGLLY = NGLLZ do l=1,NGLLZ
- hp3 = hprime_zz(k,l)
- iglob = ibool(i,j,l,ispec)
- tempx3l = tempx3l + displ(1,iglob)*hp3
- tempy3l = tempy3l + displ(2,iglob)*hp3
- tempz3l = tempz3l + displ(3,iglob)*hp3
- enddo
-
! get derivatives of ux, uy and uz with respect to x, y and z
xixl = xix(i,j,k,ispec)
xiyl = xiy(i,j,k,ispec)
@@ -149,17 +131,17 @@
gammazl = gammaz(i,j,k,ispec)
jacobianl = jacobian(i,j,k,ispec)
- duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
- duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
- duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+ duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
+ duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
+ duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
- duydxl = xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l
- duydyl = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l
- duydzl = xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l
+ duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
+ duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
+ duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
- duzdxl = xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l
- duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l
- duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l
+ duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
+ duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
+ duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
! precompute some sums to save CPU time
duxdxl_plus_duydyl = duxdxl + duydyl
@@ -201,60 +183,37 @@
enddo
enddo
+!! DK DK subroutines adapted from Deville, Fischer and Mund, High-order methods
+!! DK DK for incompressible fluid flow, Cambridge University Press (2002),
+!! DK DK pages 386 and 389 and Figure 8.3.1
+ call mxm_m1_m2_5points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
+
+ do k = 1,NGLLX
+ call mxm_m1_m1_5points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
+ hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
+ enddo
+
+ call mxm_m2_m1_5points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
+
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- tempx1l = 0.
- tempy1l = 0.
- tempz1l = 0.
-
- tempx2l = 0.
- tempy2l = 0.
- tempz2l = 0.
-
- tempx3l = 0.
- tempy3l = 0.
- tempz3l = 0.
-
- do l=1,NGLLX
- fac1 = hprimewgll_xx(l,i)
- tempx1l = tempx1l + tempx1(l,j,k)*fac1
- tempy1l = tempy1l + tempy1(l,j,k)*fac1
- tempz1l = tempz1l + tempz1(l,j,k)*fac1
-!!! can merge these loops because NGLLX = NGLLY = NGLLZ enddo
-
-!!! can merge these loops because NGLLX = NGLLY = NGLLZ do l=1,NGLLY
- fac2 = hprimewgll_yy(l,j)
- tempx2l = tempx2l + tempx2(i,l,k)*fac2
- tempy2l = tempy2l + tempy2(i,l,k)*fac2
- tempz2l = tempz2l + tempz2(i,l,k)*fac2
-!!! can merge these loops because NGLLX = NGLLY = NGLLZ enddo
-
-!!! can merge these loops because NGLLX = NGLLY = NGLLZ do l=1,NGLLZ
- fac3 = hprimewgll_zz(l,k)
- tempx3l = tempx3l + tempx3(i,j,l)*fac3
- tempy3l = tempy3l + tempy3(i,j,l)*fac3
- tempz3l = tempz3l + tempz3(i,j,l)*fac3
- enddo
-
fac1 = wgllwgll_yz(j,k)
fac2 = wgllwgll_xz(i,k)
fac3 = wgllwgll_xy(i,j)
-! sum contributions from each element to the global mesh
-
+! sum contributions from each element to the global mesh using indirect addressing
iglob = ibool(i,j,k,ispec)
+ accel(1,iglob) = accel(1,iglob) - (fac1*newtempx1(i,j,k) + fac2*newtempx2(i,j,k) + fac3*newtempx3(i,j,k))
+ accel(2,iglob) = accel(2,iglob) - (fac1*newtempy1(i,j,k) + fac2*newtempy2(i,j,k) + fac3*newtempy3(i,j,k))
+ accel(3,iglob) = accel(3,iglob) - (fac1*newtempz1(i,j,k) + fac2*newtempz2(i,j,k) + fac3*newtempz3(i,j,k))
- accel(1,iglob) = accel(1,iglob) - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l)
- accel(2,iglob) = accel(2,iglob) - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l)
- accel(3,iglob) = accel(3,iglob) - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l)
-
enddo
enddo
enddo
- endif ! if (ispec_is_inner(ispec) == phase_is_inner)
+ endif ! if (ispec_is_inner(ispec) .eqv. phase_is_inner)
enddo ! spectral element loop
@@ -274,20 +233,19 @@
ispec_selected_source(isource))
f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
t0 = 1.2d0/f0
-if (it == 1 .and. myrank == 0) then
-print *,'using a source of dominant frequency ',f0
-print *,'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
-print *,'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
-endif
+ if (it == 1 .and. myrank == 0) then
+ print *,'using a source of dominant frequency ',f0
+ print *,'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+ print *,'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+ endif
-
! we use nu_source(:,3) here because we want a source normal to the surface.
! This is the expression of a Ricker; should be changed according maybe to the Par_file.
!accel(:,iglob) = accel(:,iglob) + &
! sngl(nu_source(:,3,isource) * 10000000.d0 * (1.d0-2.d0*PI*PI*f0*f0*(dble(it-1)*DT-t0)*(dble(it-1)*DT-t0)) * &
! exp(-PI*PI*f0*f0*(dble(it-1)*DT-t0)*(dble(it-1)*DT-t0)))
-accel(:,iglob) = accel(:,iglob) + &
+ accel(:,iglob) = accel(:,iglob) + &
sngl(nu_source(:,3,isource) * 1.d10 * (1.d0-2.d0*PI*PI*f0*f0*(dble(it-1)*DT-t0)*(dble(it-1)*DT-t0)) * &
exp(-PI*PI*f0*f0*(dble(it-1)*DT-t0)*(dble(it-1)*DT-t0)))
@@ -300,3 +258,126 @@
end subroutine compute_forces
+
+!! DK DK subroutines adapted from Deville, Fischer and Mund, High-order methods
+!! DK DK for incompressible fluid flow, Cambridge University Press (2002),
+!! DK DK pages 386 and 389 and Figure 8.3.1
+
+ subroutine mxm_m1_m2_5points(A,B1,B2,B3,C1,C2,C3)
+
+ implicit none
+
+ include "constants.h"
+
+ real(kind=CUSTOM_REAL), dimension(m1,NGLLX) :: A
+ real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1,B2,B3
+ real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1,C2,C3
+
+ integer :: i,j
+
+ do j=1,m2
+ do i=1,m1
+
+ C1(i,j) = A(i,1)*B1(1,j) + &
+ A(i,2)*B1(2,j) + &
+ A(i,3)*B1(3,j) + &
+ A(i,4)*B1(4,j) + &
+ A(i,5)*B1(5,j)
+
+ C2(i,j) = A(i,1)*B2(1,j) + &
+ A(i,2)*B2(2,j) + &
+ A(i,3)*B2(3,j) + &
+ A(i,4)*B2(4,j) + &
+ A(i,5)*B2(5,j)
+
+ C3(i,j) = A(i,1)*B3(1,j) + &
+ A(i,2)*B3(2,j) + &
+ A(i,3)*B3(3,j) + &
+ A(i,4)*B3(4,j) + &
+ A(i,5)*B3(5,j)
+
+ enddo
+ enddo
+
+ end subroutine mxm_m1_m2_5points
+
+!---------
+
+ subroutine mxm_m1_m1_5points(A1,A2,A3,B,C1,C2,C3)
+
+ implicit none
+
+ include "constants.h"
+
+ real(kind=CUSTOM_REAL), dimension(m1,NGLLX) :: A1,A2,A3
+ real(kind=CUSTOM_REAL), dimension(NGLLX,m1) :: B
+ real(kind=CUSTOM_REAL), dimension(m1,m1) :: C1,C2,C3
+
+ integer :: i,j
+
+ do j=1,m1
+ do i=1,m1
+
+ C1(i,j) = A1(i,1)*B(1,j) + &
+ A1(i,2)*B(2,j) + &
+ A1(i,3)*B(3,j) + &
+ A1(i,4)*B(4,j) + &
+ A1(i,5)*B(5,j)
+
+ C2(i,j) = A2(i,1)*B(1,j) + &
+ A2(i,2)*B(2,j) + &
+ A2(i,3)*B(3,j) + &
+ A2(i,4)*B(4,j) + &
+ A2(i,5)*B(5,j)
+
+ C3(i,j) = A3(i,1)*B(1,j) + &
+ A3(i,2)*B(2,j) + &
+ A3(i,3)*B(3,j) + &
+ A3(i,4)*B(4,j) + &
+ A3(i,5)*B(5,j)
+
+ enddo
+ enddo
+
+ end subroutine mxm_m1_m1_5points
+
+!---------
+
+ subroutine mxm_m2_m1_5points(A1,A2,A3,B,C1,C2,C3)
+
+ implicit none
+
+ include "constants.h"
+
+ real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: A1,A2,A3
+ real(kind=CUSTOM_REAL), dimension(NGLLX,m1) :: B
+ real(kind=CUSTOM_REAL), dimension(m2,m1) :: C1,C2,C3
+
+ integer :: i,j
+
+ do j=1,m1
+ do i=1,m2
+
+ C1(i,j) = A1(i,1)*B(1,j) + &
+ A1(i,2)*B(2,j) + &
+ A1(i,3)*B(3,j) + &
+ A1(i,4)*B(4,j) + &
+ A1(i,5)*B(5,j)
+
+ C2(i,j) = A2(i,1)*B(1,j) + &
+ A2(i,2)*B(2,j) + &
+ A2(i,3)*B(3,j) + &
+ A2(i,4)*B(4,j) + &
+ A2(i,5)*B(5,j)
+
+ C3(i,j) = A3(i,1)*B(1,j) + &
+ A3(i,2)*B(2,j) + &
+ A3(i,3)*B(3,j) + &
+ A3(i,4)*B(4,j) + &
+ A3(i,5)*B(5,j)
+
+ enddo
+ enddo
+
+ end subroutine mxm_m2_m1_5points
+
Added: seismo/3D/SPECFEM3D_SESAME/trunk/compute_forces_no_Deville.f90
===================================================================
--- seismo/3D/SPECFEM3D_SESAME/trunk/compute_forces_no_Deville.f90 (rev 0)
+++ seismo/3D/SPECFEM3D_SESAME/trunk/compute_forces_no_Deville.f90 2008-12-27 22:34:47 UTC (rev 13803)
@@ -0,0 +1,300 @@
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 1 . 4
+! ---------------------------------------
+!
+! Dimitri Komatitsch and Jeroen Tromp
+! Seismological Laboratory - California Institute of Technology
+! (c) California Institute of Technology September 2006
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+subroutine compute_forces(NSPEC_AB,NGLOB_AB,displ,accel,xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool,ispec_is_inner,phase_is_inner, &
+ NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,xi_source,eta_source,gamma_source,nu_source,hdur,dt)
+
+ implicit none
+
+ include "constants.h"
+
+ integer :: NSPEC_AB,NGLOB_AB
+
+! displacement and acceleration
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,accel
+
+! arrays with mesh parameters per slice
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+ kappastore,mustore,jacobian
+
+! array with derivatives of Lagrange polynomials and precalculated products
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
+ real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
+ real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+ real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+! communication overlap
+ logical, dimension(NSPEC_AB) :: ispec_is_inner
+ logical :: phase_is_inner
+
+! source
+ integer :: NSOURCES,myrank,it
+ integer, dimension(NSOURCES) :: islice_selected_source,ispec_selected_source
+ double precision, dimension(NSOURCES) :: xi_source,eta_source,gamma_source
+ double precision, dimension(3,3,NSOURCES) :: nu_source
+ double precision, dimension(NSOURCES) :: hdur
+ double precision :: dt
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+ tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
+
+ integer ispec,iglob
+ integer i,j,k,l
+
+ real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+ real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
+
+ real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
+ real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
+
+ real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
+
+ real(kind=CUSTOM_REAL) hp1,hp2,hp3
+ real(kind=CUSTOM_REAL) fac1,fac2,fac3
+
+ real(kind=CUSTOM_REAL) tempx1l,tempx2l,tempx3l
+ real(kind=CUSTOM_REAL) tempy1l,tempy2l,tempy3l
+ real(kind=CUSTOM_REAL) tempz1l,tempz2l,tempz3l
+
+ real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
+ real(kind=CUSTOM_REAL) kappal
+
+ integer :: isource
+ double precision :: t0,f0
+
+
+ do ispec = 1,NSPEC_AB
+
+ if (ispec_is_inner(ispec) .eqv. phase_is_inner) then
+
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+
+ tempx1l = 0.
+ tempx2l = 0.
+ tempx3l = 0.
+
+ tempy1l = 0.
+ tempy2l = 0.
+ tempy3l = 0.
+
+ tempz1l = 0.
+ tempz2l = 0.
+ tempz3l = 0.
+
+ do l=1,NGLLX
+ hp1 = hprime_xx(i,l)
+ iglob = ibool(l,j,k,ispec)
+ tempx1l = tempx1l + displ(1,iglob)*hp1
+ tempy1l = tempy1l + displ(2,iglob)*hp1
+ tempz1l = tempz1l + displ(3,iglob)*hp1
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ do l=1,NGLLY
+ hp2 = hprime_yy(j,l)
+ iglob = ibool(i,l,k,ispec)
+ tempx2l = tempx2l + displ(1,iglob)*hp2
+ tempy2l = tempy2l + displ(2,iglob)*hp2
+ tempz2l = tempz2l + displ(3,iglob)*hp2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ do l=1,NGLLZ
+ hp3 = hprime_zz(k,l)
+ iglob = ibool(i,j,l,ispec)
+ tempx3l = tempx3l + displ(1,iglob)*hp3
+ tempy3l = tempy3l + displ(2,iglob)*hp3
+ tempz3l = tempz3l + displ(3,iglob)*hp3
+ enddo
+
+! get derivatives of ux, uy and uz with respect to x, y and z
+ xixl = xix(i,j,k,ispec)
+ xiyl = xiy(i,j,k,ispec)
+ xizl = xiz(i,j,k,ispec)
+ etaxl = etax(i,j,k,ispec)
+ etayl = etay(i,j,k,ispec)
+ etazl = etaz(i,j,k,ispec)
+ gammaxl = gammax(i,j,k,ispec)
+ gammayl = gammay(i,j,k,ispec)
+ gammazl = gammaz(i,j,k,ispec)
+ jacobianl = jacobian(i,j,k,ispec)
+
+ duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+ duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+ duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+ duydxl = xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l
+ duydyl = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l
+ duydzl = xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l
+
+ duzdxl = xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l
+ duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l
+ duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l
+
+! precompute some sums to save CPU time
+ duxdxl_plus_duydyl = duxdxl + duydyl
+ duxdxl_plus_duzdzl = duxdxl + duzdzl
+ duydyl_plus_duzdzl = duydyl + duzdzl
+ duxdyl_plus_duydxl = duxdyl + duydxl
+ duzdxl_plus_duxdzl = duzdxl + duxdzl
+ duzdyl_plus_duydzl = duzdyl + duydzl
+
+ kappal = kappastore(i,j,k,ispec)
+ mul = mustore(i,j,k,ispec)
+
+ lambdalplus2mul = kappal + FOUR_THIRDS * mul
+ lambdal = lambdalplus2mul - 2.*mul
+
+! compute stress sigma
+ sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
+ sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
+ sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
+
+ sigma_xy = mul*duxdyl_plus_duydxl
+ sigma_xz = mul*duzdxl_plus_duxdzl
+ sigma_yz = mul*duzdyl_plus_duydzl
+
+! form dot product with test vector, symmetric form
+ tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_xy*xiyl + sigma_xz*xizl)
+ tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_yz*xizl)
+ tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl)
+
+ tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_xy*etayl + sigma_xz*etazl)
+ tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_yz*etazl)
+ tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl)
+
+ tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_xy*gammayl + sigma_xz*gammazl)
+ tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_yz*gammazl)
+ tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl)
+
+ enddo
+ enddo
+ enddo
+
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+
+ tempx1l = 0.
+ tempy1l = 0.
+ tempz1l = 0.
+
+ tempx2l = 0.
+ tempy2l = 0.
+ tempz2l = 0.
+
+ tempx3l = 0.
+ tempy3l = 0.
+ tempz3l = 0.
+
+ do l=1,NGLLX
+ fac1 = hprimewgll_xx(l,i)
+ tempx1l = tempx1l + tempx1(l,j,k)*fac1
+ tempy1l = tempy1l + tempy1(l,j,k)*fac1
+ tempz1l = tempz1l + tempz1(l,j,k)*fac1
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ do l=1,NGLLY
+ fac2 = hprimewgll_yy(l,j)
+ tempx2l = tempx2l + tempx2(i,l,k)*fac2
+ tempy2l = tempy2l + tempy2(i,l,k)*fac2
+ tempz2l = tempz2l + tempz2(i,l,k)*fac2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ do l=1,NGLLZ
+ fac3 = hprimewgll_zz(l,k)
+ tempx3l = tempx3l + tempx3(i,j,l)*fac3
+ tempy3l = tempy3l + tempy3(i,j,l)*fac3
+ tempz3l = tempz3l + tempz3(i,j,l)*fac3
+ enddo
+
+ fac1 = wgllwgll_yz(j,k)
+ fac2 = wgllwgll_xz(i,k)
+ fac3 = wgllwgll_xy(i,j)
+
+! sum contributions from each element to the global mesh
+
+ iglob = ibool(i,j,k,ispec)
+
+ accel(1,iglob) = accel(1,iglob) - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l)
+ accel(2,iglob) = accel(2,iglob) - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l)
+ accel(3,iglob) = accel(3,iglob) - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l)
+
+ enddo
+ enddo
+ enddo
+
+ endif ! if (ispec_is_inner(ispec) .eqv. phase_is_inner)
+
+ enddo ! spectral element loop
+
+! adding source
+ do isource = 1,NSOURCES
+
+ if (ispec_is_inner(ispec_selected_source(isource)) .eqv. phase_is_inner) then
+
+ if(USE_FORCE_POINT_SOURCE) then
+
+! add the source (only if this proc carries the source)
+ if(myrank == islice_selected_source(isource)) then
+
+ iglob = ibool(nint(xi_source(isource)), &
+ nint(eta_source(isource)), &
+ nint(gamma_source(isource)), &
+ ispec_selected_source(isource))
+ f0 = hdur(isource) !! using hdur as a FREQUENCY just to avoid changing CMTSOLUTION file format
+ t0 = 1.2d0/f0
+
+ if (it == 1 .and. myrank == 0) then
+ print *,'using a source of dominant frequency ',f0
+ print *,'lambda_S at dominant frequency = ',3000./sqrt(3.)/f0
+ print *,'lambda_S at highest significant frequency = ',3000./sqrt(3.)/(2.5*f0)
+ endif
+
+ ! we use nu_source(:,3) here because we want a source normal to the surface.
+ ! This is the expression of a Ricker; should be changed according maybe to the Par_file.
+ !accel(:,iglob) = accel(:,iglob) + &
+ ! sngl(nu_source(:,3,isource) * 10000000.d0 * (1.d0-2.d0*PI*PI*f0*f0*(dble(it-1)*DT-t0)*(dble(it-1)*DT-t0)) * &
+ ! exp(-PI*PI*f0*f0*(dble(it-1)*DT-t0)*(dble(it-1)*DT-t0)))
+ accel(:,iglob) = accel(:,iglob) + &
+ sngl(nu_source(:,3,isource) * 1.d10 * (1.d0-2.d0*PI*PI*f0*f0*(dble(it-1)*DT-t0)*(dble(it-1)*DT-t0)) * &
+ exp(-PI*PI*f0*f0*(dble(it-1)*DT-t0)*(dble(it-1)*DT-t0)))
+
+ endif
+ endif
+
+ endif
+
+ enddo
+
+end subroutine compute_forces
+
Modified: seismo/3D/SPECFEM3D_SESAME/trunk/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D_SESAME/trunk/constants.h.in 2008-12-27 21:20:45 UTC (rev 13802)
+++ seismo/3D/SPECFEM3D_SESAME/trunk/constants.h.in 2008-12-27 22:34:47 UTC (rev 13803)
@@ -52,6 +52,9 @@
integer, parameter :: NGLLY = NGLLX
integer, parameter :: NGLLZ = NGLLX
+! for optimized routines by Deville et al. (2002)
+ integer, parameter :: m1 = NGLLX, m2 = NGLLX * NGLLY
+
! input, output and main MPI I/O files
integer, parameter :: ISTANDARD_OUTPUT = 6
integer, parameter :: IIN = 40,IOUT = 41
Modified: seismo/3D/SPECFEM3D_SESAME/trunk/flags.guess
===================================================================
--- seismo/3D/SPECFEM3D_SESAME/trunk/flags.guess 2008-12-27 21:20:45 UTC (rev 13802)
+++ seismo/3D/SPECFEM3D_SESAME/trunk/flags.guess 2008-12-27 22:34:47 UTC (rev 13803)
@@ -29,7 +29,9 @@
FLAGS_CHECK="-O3 -vec-report0 -e03 -std03 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage -check nobounds -align sequence -assume byterecl -fpe0 -ftz -traceback -ftrapuv" # -mcmodel=medium
fi
if test x"$FLAGS_NO_CHECK" = x; then
- # standard options (leave option -ftz, which is *critical* for performance)
+# standard options (leave option -ftz, which is *critical* for performance)
+# add -Winline to get information about routines that are inlined
+# add -vec-report3 to get information about loops that are vectorized or not
FLAGS_NO_CHECK="-O3 -xP -vec-report0 -e03 -std03 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage -check nobounds -align sequence -assume byterecl -fpe3 -ftz" # -mcmodel=medium
fi
;;
Modified: seismo/3D/SPECFEM3D_SESAME/trunk/specfem3D.f90
===================================================================
--- seismo/3D/SPECFEM3D_SESAME/trunk/specfem3D.f90 2008-12-27 21:20:45 UTC (rev 13802)
+++ seismo/3D/SPECFEM3D_SESAME/trunk/specfem3D.f90 2008-12-27 22:34:47 UTC (rev 13803)
@@ -288,7 +288,7 @@
double precision, dimension(NGLLZ) :: zigll,wzgll
! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
@@ -480,6 +480,10 @@
! and decomposed with METIS or SCOTCH)
if (.not. USE_EXTERNAL_MESH) stop 'SPECFEM3D_SESAME is for external meshes only'
+! check that optimized routines from Deville et al. (2002) can be used
+ if(NGLLX /= 5 .or. NGLLY /= 5 .or. NGLLZ /= 5) &
+ stop 'optimized routines from Deville et al. (2002) such as mxm_m1_m2_5points can only be used if NGLL = 5'
+
! info about external mesh simulation
! nlegoff -- should be put in compute_parameters and read_parameter_file for clarity
NPROC = sizeprocs
@@ -655,6 +659,14 @@
hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz)
+! define transpose of derivation matrix
+ do j = 1,NGLLY
+ do i = 1,NGLLX
+ hprime_xxT(j,i) = hprime_xx(i,j)
+ hprimewgll_xxT(j,i) = hprimewgll_xx(i,j)
+ enddo
+ enddo
+
! allocate 1-D Lagrange interpolators and derivatives
allocate(hxir(NGLLX))
allocate(hpxir(NGLLX))
@@ -1784,7 +1796,7 @@
! assemble all the contributions between slices using MPI
call compute_forces(NSPEC_AB,NGLOB_AB,displ,accel,xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
kappastore,mustore,jacobian,ibool,ispec_is_inner_ext_mesh,.false., &
NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,xi_source,eta_source,gamma_source,nu_source,hdur,dt)
@@ -1795,7 +1807,7 @@
request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
call compute_forces(NSPEC_AB,NGLOB_AB,displ,accel,xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
kappastore,mustore,jacobian,ibool,ispec_is_inner_ext_mesh,.true., &
NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,xi_source,eta_source,gamma_source,nu_source,hdur,dt)
More information about the CIG-COMMITS
mailing list