[cig-commits] r21587 - in seismo/3D/SPECFEM3D/trunk: . src/specfem3D

dkomati1 at geodynamics.org dkomati1 at geodynamics.org
Tue Mar 19 18:37:35 PDT 2013


Author: dkomati1
Date: 2013-03-19 18:37:35 -0700 (Tue, 19 Mar 2013)
New Revision: 21587

Added:
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_Dev.F90
Removed:
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_Dev.f90
Modified:
   seismo/3D/SPECFEM3D/trunk/flags.guess
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_calling_routine.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/setup_GLL_points.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/specfem3D_par.f90
Log:
fully vectorized compute_forces_acoustic_Dev.F90


Modified: seismo/3D/SPECFEM3D/trunk/flags.guess
===================================================================
--- seismo/3D/SPECFEM3D/trunk/flags.guess	2013-03-19 23:24:19 UTC (rev 21586)
+++ seismo/3D/SPECFEM3D/trunk/flags.guess	2013-03-20 01:37:35 UTC (rev 21587)
@@ -18,7 +18,7 @@
         # Cray Fortran
         #
         if test x"$FLAGS_CHECK" = x; then
-            FLAGS_CHECK="-O3 -Onoaggress -Oipa0 -hfp2 -Ovector3 -Oscalar3 -Ocache2 -Ounroll2 -Ofusion2" # turn on optimization; -Oaggress -Oipa4 would make it even more aggressive
+            FLAGS_CHECK="-O3 -Onoaggress -Oipa0 -hfp2 -Ovector3 -Oscalar3 -Ocache2 -Ounroll2 -Ofusion2 -DFORCE_VECTORIZATION" # turn on optimization; -Oaggress -Oipa4 would make it even more aggressive
         # -eC -eD -ec -en -eI -ea -g -G0  # turn on full debugging and range checking
         fi
         ;;
@@ -27,7 +27,7 @@
         # Portland PGI
         #
         if test x"$FLAGS_CHECK" = x; then
-            FLAGS_CHECK="-fast -Mnobounds -Minline -Mneginfo -Mdclchk -Knoieee -Minform=warn -Mdaz -Mflushz" # -mcmodel=medium
+            FLAGS_CHECK="-fast -Mnobounds -Minline -Mneginfo -Mdclchk -Knoieee -Minform=warn -Mdaz -Mflushz -DFORCE_VECTORIZATION" # -mcmodel=medium
         # -Mbounds
         # -fastsse -tp amd64e -Msmart
         fi
@@ -42,20 +42,22 @@
 # I/O throughput lingers at 2.5 MB/s, with it it can increase to ~44 MB/s
 # However it does not make much of a difference on NFS mounted volumes or with SFS 3.1.1 / Lustre 1.6.7.1 
         if test x"$FLAGS_CHECK" = x; then
-            FLAGS_CHECK="-O3 -check nobounds -xHost -ftz -fpe0 -assume buffered_io -assume byterecl -align sequence -vec-report0 -std03 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage" # -mcmodel=medium -shared-intel
+            FLAGS_CHECK="-O3 -check nobounds -DFORCE_VECTORIZATION -xHost -ftz -fpe0 -assume buffered_io -assume byterecl -align sequence -vec-report0 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage" # -mcmodel=medium -shared-intel
         fi
         # useful for debugging...
-        # for debugging: change -O3 -check nobounds to      -check all -debug -g -O0 -fp-stack-check -traceback -ftrapuv
+        # for debugging: change "-O3 -check nobounds -DFORCE_VECTORIZATION" to  "-check all -debug -g -O0 -fp-stack-check -traceback -ftrapuv"
         #
+        # for developers, to check Fortran2003 standard conformity add "-std03" and ignore all warnings about the "DIR IVDEP" directive, which we use purposely
+        #
         ;;
     gfortran|*/gfortran|f95|*/f95)
         #
         # GNU gfortran
         #
         if test x"$FLAGS_CHECK" = x; then
-            FLAGS_CHECK="-std=f2003 -fimplicit-none -frange-check -O2 -fmax-errors=10 -pedantic -pedantic-errors -Waliasing -Wampersand -Wcharacter-truncation -Wline-truncation -Wsurprising -Wno-tabs -Wunderflow -ffpe-trap=invalid,zero,overflow " # -mcmodel=medium
+            FLAGS_CHECK="-std=f2003 -fimplicit-none -frange-check -O2 -DFORCE_VECTORIZATION -fmax-errors=10 -pedantic -pedantic-errors -Waliasing -Wampersand -Wcharacter-truncation -Wline-truncation -Wsurprising -Wno-tabs -Wunderflow -ffpe-trap=invalid,zero,overflow " # -mcmodel=medium
         fi
-        # useful for debugging... -fbacktrace -fbounds-check
+        # useful for debugging... -fbacktrace -fbounds-check, and suppress -DFORCE_VECTORIZATION
         ;;
     g95|*/g95)
         #
@@ -139,7 +141,7 @@
         #
         if test x"$FLAGS_CHECK" = x; then
 # deleted -qxflag=dvz because it requires handler function __xl_dzx and thus linking will fail 
-            FLAGS_CHECK="-O3 -qsave -qstrict -q64 -qtune=auto -qarch=auto -qcache=auto -qfree=f90 -qsuffix=f=f90 -qhalt=w -qlanglvl=2003std -qsuppress=1518-317 -Q -Q+rank,swap_all -Wl,-relax -qunroll=yes"
+            FLAGS_CHECK="-O3 -qsave -qstrict -q64 -qtune=auto -qarch=auto -qcache=auto -qfree=f90 -qsuffix=f=f90 -qhalt=w -qlanglvl=2003std -qsuppress=1518-317 -Q -Q+rank,swap_all -Wl,-relax -WF,-DFORCE_VECTORIZATION"
         # on MareNostrum at the Barcelona SuperComputing Center (Spain) one can use
         # -qtune=ppc970 -qarch=ppc64v instead of -qtune=auto -qarch=auto
         # on "Babel" IBM BlueGene at IDRIS (France) use:

Copied: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_Dev.F90 (from rev 21586, seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_Dev.f90)
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_Dev.F90	                        (rev 0)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_Dev.F90	2013-03-20 01:37:35 UTC (rev 21587)
@@ -0,0 +1,302 @@
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 1
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and CNRS / INRIA / University of Pau
+! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
+!                             July 2012
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! for acoustic solver
+
+  subroutine compute_forces_acoustic_Dev(iphase,NSPEC_AB,NGLOB_AB, &
+                        potential_acoustic,potential_dot_dot_acoustic, &
+                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                        hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                        wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D, &
+                        rhostore,jacobian,ibool, &
+                        num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
+                        phase_ispec_inner_acoustic)
+
+! computes forces for acoustic elements
+!
+! note that pressure is defined as:
+!     p = - Chi_dot_dot
+!
+  use specfem_par,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,TINYVAL_SNGL,ABSORB_USE_PML, &
+              ABSORBING_CONDITIONS,PML_CONDITIONS,m1,m2,NGLLCUBE
+
+  implicit none
+
+  integer :: NSPEC_AB,NGLOB_AB
+
+  ! acoustic potentials
+  real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: &
+        potential_acoustic,potential_dot_dot_acoustic
+
+  ! arrays with mesh parameters per slice
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+        rhostore,jacobian
+
+  ! array with derivatives of Lagrange polynomials and precalculated products
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprimewgll_xx,hprimewgll_xxT
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D
+
+  integer :: iphase
+  integer :: num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic
+  integer, dimension(num_phase_ispec_acoustic,2) :: phase_ispec_inner_acoustic
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: newtempx1,newtempx2,newtempx3
+
+  real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+  real(kind=CUSTOM_REAL) :: dpotentialdxl,dpotentialdyl,dpotentialdzl
+  real(kind=CUSTOM_REAL) :: rho_invl
+
+  integer :: ispec,iglob,i,j,k,ispec_p,num_elements
+
+  ! manually inline the calls to the Deville et al. (2002) routines
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: chi_elem
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points
+  real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points
+  real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_5points
+
+  equivalence(chi_elem,B1_m1_m2_5points)
+  equivalence(tempx1,C1_m1_m2_5points)
+  equivalence(newtempx1,E1_m1_m2_5points)
+
+  real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: A1_mxm_m2_m1_5points
+  real(kind=CUSTOM_REAL), dimension(m2,m1) :: C1_mxm_m2_m1_5points
+  real(kind=CUSTOM_REAL), dimension(m2,m1) :: E1_mxm_m2_m1_5points
+
+  equivalence(chi_elem,A1_mxm_m2_m1_5points)
+  equivalence(tempx3,C1_mxm_m2_m1_5points)
+  equivalence(newtempx3,E1_mxm_m2_m1_5points)
+
+#ifdef FORCE_VECTORIZATION
+  integer :: ijk
+#endif
+
+  if( iphase == 1 ) then
+    num_elements = nspec_outer_acoustic
+  else
+    num_elements = nspec_inner_acoustic
+  endif
+
+  ! loop over spectral elements
+  do ispec_p = 1,num_elements
+
+    ispec = phase_ispec_inner_acoustic(ispec_p,iphase)
+
+    ! gets values for element
+#ifndef FORCE_VECTORIZATION
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+          chi_elem(i,j,k) = potential_acoustic(ibool(i,j,k,ispec))
+        enddo
+      enddo
+    enddo
+#else
+! this will (purposely) give out-of-bound array accesses if run through range checking,
+! thus use only for production runs with no bound checking
+    do ijk = 1,NGLLCUBE
+      chi_elem(ijk,1,1) = potential_acoustic(ibool(ijk,1,1,ispec))
+    enddo
+#endif
+
+    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+    ! for incompressible fluid flow, Cambridge University Press (2002),
+    ! pages 386 and 389 and Figure 8.3.1
+    do j=1,m2
+      do i=1,m1
+        C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
+                                hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
+                                hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
+                                hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
+                                hprime_xx(i,5)*B1_m1_m2_5points(5,j)
+      enddo
+    enddo
+
+    do k = 1,NGLLX
+      do j=1,m1
+        do i=1,m1
+          tempx2(i,j,k) = chi_elem(i,1,k)*hprime_xxT(1,j) + &
+                          chi_elem(i,2,k)*hprime_xxT(2,j) + &
+                          chi_elem(i,3,k)*hprime_xxT(3,j) + &
+                          chi_elem(i,4,k)*hprime_xxT(4,j) + &
+                          chi_elem(i,5,k)*hprime_xxT(5,j)
+        enddo
+      enddo
+    enddo
+
+    do j=1,m1
+      do i=1,m2
+        C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+                                    A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+                                    A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+                                    A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+                                    A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+      enddo
+    enddo
+
+#ifndef FORCE_VECTORIZATION
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+         ! get derivatives of potential with respect to x, y and z
+          xixl = xix(i,j,k,ispec)
+          xiyl = xiy(i,j,k,ispec)
+          xizl = xiz(i,j,k,ispec)
+          etaxl = etax(i,j,k,ispec)
+          etayl = etay(i,j,k,ispec)
+          etazl = etaz(i,j,k,ispec)
+          gammaxl = gammax(i,j,k,ispec)
+          gammayl = gammay(i,j,k,ispec)
+          gammazl = gammaz(i,j,k,ispec)
+          jacobianl = jacobian(i,j,k,ispec)
+
+          ! derivatives of potential
+          dpotentialdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
+          dpotentialdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
+          dpotentialdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
+
+          ! density (reciproc)
+          rho_invl = 1.0_CUSTOM_REAL / rhostore(i,j,k,ispec)
+
+          ! for acoustic medium
+          ! also add GLL integration weights
+          tempx1(i,j,k) = rho_invl * jacobianl* &
+                        (xixl*dpotentialdxl + xiyl*dpotentialdyl + xizl*dpotentialdzl)
+          tempx2(i,j,k) = rho_invl * jacobianl* &
+                        (etaxl*dpotentialdxl + etayl*dpotentialdyl + etazl*dpotentialdzl)
+          tempx3(i,j,k) = rho_invl * jacobianl* &
+                        (gammaxl*dpotentialdxl + gammayl*dpotentialdyl + gammazl*dpotentialdzl)
+        enddo
+      enddo
+    enddo
+#else
+    do ijk = 1,NGLLCUBE
+         ! get derivatives of potential with respect to x, y and z
+          xixl = xix(ijk,1,1,ispec)
+          xiyl = xiy(ijk,1,1,ispec)
+          xizl = xiz(ijk,1,1,ispec)
+          etaxl = etax(ijk,1,1,ispec)
+          etayl = etay(ijk,1,1,ispec)
+          etazl = etaz(ijk,1,1,ispec)
+          gammaxl = gammax(ijk,1,1,ispec)
+          gammayl = gammay(ijk,1,1,ispec)
+          gammazl = gammaz(ijk,1,1,ispec)
+          jacobianl = jacobian(ijk,1,1,ispec)
+
+          ! derivatives of potential
+          dpotentialdxl = xixl*tempx1(ijk,1,1) + etaxl*tempx2(ijk,1,1) + gammaxl*tempx3(ijk,1,1)
+          dpotentialdyl = xiyl*tempx1(ijk,1,1) + etayl*tempx2(ijk,1,1) + gammayl*tempx3(ijk,1,1)
+          dpotentialdzl = xizl*tempx1(ijk,1,1) + etazl*tempx2(ijk,1,1) + gammazl*tempx3(ijk,1,1)
+
+          ! density (reciproc)
+          rho_invl = 1.0_CUSTOM_REAL / rhostore(ijk,1,1,ispec)
+
+          ! for acoustic medium
+          ! also add GLL integration weights
+          tempx1(ijk,1,1) = rho_invl * jacobianl* &
+                        (xixl*dpotentialdxl + xiyl*dpotentialdyl + xizl*dpotentialdzl)
+          tempx2(ijk,1,1) = rho_invl * jacobianl* &
+                        (etaxl*dpotentialdxl + etayl*dpotentialdyl + etazl*dpotentialdzl)
+          tempx3(ijk,1,1) = rho_invl * jacobianl* &
+                        (gammaxl*dpotentialdxl + gammayl*dpotentialdyl + gammazl*dpotentialdzl)
+    enddo
+#endif
+
+    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+    ! for incompressible fluid flow, Cambridge University Press (2002),
+    ! pages 386 and 389 and Figure 8.3.1
+    do j=1,m2
+      do i=1,m1
+        E1_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_5points(1,j) + &
+                                hprimewgll_xxT(i,2)*C1_m1_m2_5points(2,j) + &
+                                hprimewgll_xxT(i,3)*C1_m1_m2_5points(3,j) + &
+                                hprimewgll_xxT(i,4)*C1_m1_m2_5points(4,j) + &
+                                hprimewgll_xxT(i,5)*C1_m1_m2_5points(5,j)
+      enddo
+    enddo
+
+    do k = 1,NGLLX
+      do j=1,m1
+        do i=1,m1
+          newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
+                             tempx2(i,2,k)*hprimewgll_xx(2,j) + &
+                             tempx2(i,3,k)*hprimewgll_xx(3,j) + &
+                             tempx2(i,4,k)*hprimewgll_xx(4,j) + &
+                             tempx2(i,5,k)*hprimewgll_xx(5,j)
+        enddo
+      enddo
+    enddo
+
+    do j=1,m1
+      do i=1,m2
+        E1_mxm_m2_m1_5points(i,j) = C1_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
+                                    C1_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
+                                    C1_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
+                                    C1_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
+                                    C1_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
+      enddo
+    enddo
+
+
+    ! second double-loop over GLL to compute all the terms
+#ifndef FORCE_VECTORIZATION
+    do k = 1,NGLLZ
+      do j = 1,NGLLZ
+        do i = 1,NGLLX
+
+          ! sum contributions from each element to the global values
+          iglob = ibool(i,j,k,ispec)
+
+          potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) - (wgllwgll_yz_3D(i,j,k)*newtempx1(i,j,k) &
+                       + wgllwgll_xz_3D(i,j,k)*newtempx2(i,j,k) + wgllwgll_xy_3D(i,j,k)*newtempx3(i,j,k))
+
+        enddo
+      enddo
+    enddo
+#else
+! we can force vectorization using a compiler directive here because we know that there is no dependency
+! inside a given spectral element, since all the global points of a local elements are different by definition
+! (only common points between different elements can be the same)
+!DIR$ IVDEP
+    do ijk = 1,NGLLCUBE
+          ! sum contributions from each element to the global values
+          iglob = ibool(ijk,1,1,ispec)
+          potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) - (wgllwgll_yz_3D(ijk,1,1)*newtempx1(ijk,1,1) &
+                       + wgllwgll_xz_3D(ijk,1,1)*newtempx2(ijk,1,1) + wgllwgll_xy_3D(ijk,1,1)*newtempx3(ijk,1,1))
+    enddo
+#endif
+
+  enddo ! end of loop over all spectral elements
+
+  end subroutine compute_forces_acoustic_Dev
+

Deleted: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_Dev.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_Dev.f90	2013-03-19 23:24:19 UTC (rev 21586)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_Dev.f90	2013-03-20 01:37:35 UTC (rev 21587)
@@ -1,246 +0,0 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 1
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and CNRS / INRIA / University of Pau
-! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
-!                             July 2012
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! for acoustic solver
-
-  subroutine compute_forces_acoustic_Dev(iphase,NSPEC_AB,NGLOB_AB, &
-                        potential_acoustic,potential_dot_dot_acoustic, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        rhostore,jacobian,ibool, &
-                        num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
-                        phase_ispec_inner_acoustic)
-
-! computes forces for acoustic elements
-!
-! note that pressure is defined as:
-!     p = - Chi_dot_dot
-!
-  use specfem_par,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,TINYVAL_SNGL,ABSORB_USE_PML,ABSORBING_CONDITIONS,PML_CONDITIONS,m1,m2
-
-  implicit none
-
-  integer :: NSPEC_AB,NGLOB_AB
-
-  ! acoustic potentials
-  real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: &
-        potential_acoustic,potential_dot_dot_acoustic
-
-  ! arrays with mesh parameters per slice
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        rhostore,jacobian
-
-  ! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprimewgll_xx,hprimewgll_xxT
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-  integer :: iphase
-  integer :: num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic
-  integer, dimension(num_phase_ispec_acoustic,2) :: phase_ispec_inner_acoustic
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: newtempx1,newtempx2,newtempx3
-
-  real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
-  real(kind=CUSTOM_REAL) :: dpotentialdxl,dpotentialdyl,dpotentialdzl
-  real(kind=CUSTOM_REAL) :: rho_invl
-
-  integer :: ispec,iglob,i,j,k,ispec_p,num_elements
-
-  ! manually inline the calls to the Deville et al. (2002) routines
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: chi_elem
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,m2) :: B1_m1_m2_5points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: C1_m1_m2_5points
-  real(kind=CUSTOM_REAL), dimension(m1,m2) :: E1_m1_m2_5points
-
-  equivalence(chi_elem,B1_m1_m2_5points)
-  equivalence(tempx1,C1_m1_m2_5points)
-  equivalence(newtempx1,E1_m1_m2_5points)
-
-  real(kind=CUSTOM_REAL), dimension(m2,NGLLX) :: A1_mxm_m2_m1_5points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: C1_mxm_m2_m1_5points
-  real(kind=CUSTOM_REAL), dimension(m2,m1) :: E1_mxm_m2_m1_5points
-
-  equivalence(chi_elem,A1_mxm_m2_m1_5points)
-  equivalence(tempx3,C1_mxm_m2_m1_5points)
-  equivalence(newtempx3,E1_mxm_m2_m1_5points)
-
-  if( iphase == 1 ) then
-    num_elements = nspec_outer_acoustic
-  else
-    num_elements = nspec_inner_acoustic
-  endif
-
-  ! loop over spectral elements
-  do ispec_p = 1,num_elements
-
-    ispec = phase_ispec_inner_acoustic(ispec_p,iphase)
-
-    ! gets values for element
-    do k=1,NGLLZ
-      do j=1,NGLLY
-        do i=1,NGLLX
-          chi_elem(i,j,k) = potential_acoustic(ibool(i,j,k,ispec))
-        enddo
-      enddo
-    enddo
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-    do j=1,m2
-      do i=1,m1
-        C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
-                                hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
-                                hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
-                                hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
-                                hprime_xx(i,5)*B1_m1_m2_5points(5,j)
-      enddo
-    enddo
-
-    do k = 1,NGLLX
-      do j=1,m1
-        do i=1,m1
-          tempx2(i,j,k) = chi_elem(i,1,k)*hprime_xxT(1,j) + &
-                          chi_elem(i,2,k)*hprime_xxT(2,j) + &
-                          chi_elem(i,3,k)*hprime_xxT(3,j) + &
-                          chi_elem(i,4,k)*hprime_xxT(4,j) + &
-                          chi_elem(i,5,k)*hprime_xxT(5,j)
-        enddo
-      enddo
-    enddo
-
-    do j=1,m1
-      do i=1,m2
-        C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
-                                    A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
-                                    A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
-                                    A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
-                                    A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-      enddo
-    enddo
-
-
-    do k=1,NGLLZ
-      do j=1,NGLLY
-        do i=1,NGLLX
-
-         ! get derivatives of potential with respect to x, y and z
-          xixl = xix(i,j,k,ispec)
-          xiyl = xiy(i,j,k,ispec)
-          xizl = xiz(i,j,k,ispec)
-          etaxl = etax(i,j,k,ispec)
-          etayl = etay(i,j,k,ispec)
-          etazl = etaz(i,j,k,ispec)
-          gammaxl = gammax(i,j,k,ispec)
-          gammayl = gammay(i,j,k,ispec)
-          gammazl = gammaz(i,j,k,ispec)
-          jacobianl = jacobian(i,j,k,ispec)
-
-          ! derivatives of potential
-          dpotentialdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
-          dpotentialdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
-          dpotentialdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
-
-          ! density (reciproc)
-          rho_invl = 1.0_CUSTOM_REAL / rhostore(i,j,k,ispec)
-
-          ! for acoustic medium
-          ! also add GLL integration weights
-          tempx1(i,j,k) = rho_invl * jacobianl* &
-                        (xixl*dpotentialdxl + xiyl*dpotentialdyl + xizl*dpotentialdzl)
-          tempx2(i,j,k) = rho_invl * jacobianl* &
-                        (etaxl*dpotentialdxl + etayl*dpotentialdyl + etazl*dpotentialdzl)
-          tempx3(i,j,k) = rho_invl * jacobianl* &
-                        (gammaxl*dpotentialdxl + gammayl*dpotentialdyl + gammazl*dpotentialdzl)
-        enddo
-      enddo
-    enddo
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-    do j=1,m2
-      do i=1,m1
-        E1_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_5points(1,j) + &
-                                hprimewgll_xxT(i,2)*C1_m1_m2_5points(2,j) + &
-                                hprimewgll_xxT(i,3)*C1_m1_m2_5points(3,j) + &
-                                hprimewgll_xxT(i,4)*C1_m1_m2_5points(4,j) + &
-                                hprimewgll_xxT(i,5)*C1_m1_m2_5points(5,j)
-      enddo
-    enddo
-
-    do k = 1,NGLLX
-      do j=1,m1
-        do i=1,m1
-          newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
-                             tempx2(i,2,k)*hprimewgll_xx(2,j) + &
-                             tempx2(i,3,k)*hprimewgll_xx(3,j) + &
-                             tempx2(i,4,k)*hprimewgll_xx(4,j) + &
-                             tempx2(i,5,k)*hprimewgll_xx(5,j)
-        enddo
-      enddo
-    enddo
-
-    do j=1,m1
-      do i=1,m2
-        E1_mxm_m2_m1_5points(i,j) = C1_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
-                                    C1_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
-                                    C1_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
-                                    C1_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
-                                    C1_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
-      enddo
-    enddo
-
-
-    ! second double-loop over GLL to compute all the terms
-    do k = 1,NGLLZ
-      do j = 1,NGLLZ
-        do i = 1,NGLLX
-
-          ! sum contributions from each element to the global values
-          iglob = ibool(i,j,k,ispec)
-
-          potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) - (wgllwgll_yz(j,k)*newtempx1(i,j,k) &
-                       + wgllwgll_xz(i,k)*newtempx2(i,j,k) + wgllwgll_xy(i,j)*newtempx3(i,j,k))
-
-        enddo
-      enddo
-    enddo
-
-  enddo ! end of loop over all spectral elements
-
-  end subroutine compute_forces_acoustic_Dev
-

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_calling_routine.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_calling_routine.f90	2013-03-19 23:24:19 UTC (rev 21586)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_calling_routine.f90	2013-03-20 01:37:35 UTC (rev 21587)
@@ -102,7 +102,7 @@
                         potential_acoustic,potential_dot_dot_acoustic, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                         hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                        wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D, &
                         rhostore,jacobian,ibool, &
                         num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
                         phase_ispec_inner_acoustic)
@@ -128,7 +128,7 @@
                         b_potential_acoustic,b_potential_dot_dot_acoustic, &
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                         hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                        wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D, &
                         rhostore,jacobian,ibool, &
                         num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
                         phase_ispec_inner_acoustic)

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/setup_GLL_points.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/setup_GLL_points.f90	2013-03-19 23:24:19 UTC (rev 21586)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/setup_GLL_points.f90	2013-03-20 01:37:35 UTC (rev 21587)
@@ -30,7 +30,7 @@
 
   use specfem_par
   implicit none
-  integer :: i,j,ier
+  integer :: i,j,k,ier
 
   if(myrank == 0) then
     write(IMAIN,*) '******************************************'
@@ -53,6 +53,17 @@
     enddo
   enddo
 
+! define a 3D extension in order to be able to force vectorization in the compute_forces routines
+  do k = 1,NGLLZ
+    do j = 1,NGLLY
+      do i = 1,NGLLX
+        wgllwgll_yz_3D(i,j,k) = wgllwgll_yz(j,k)
+        wgllwgll_xz_3D(i,j,k) = wgllwgll_xz(i,k)
+        wgllwgll_xy_3D(i,j,k) = wgllwgll_xy(i,j)
+      enddo
+    enddo
+  enddo
+
 ! allocate 1-D Lagrange interpolators and derivatives
   allocate(hxir(NGLLX), &
           hpxir(NGLLX), &

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/specfem3D_par.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/specfem3D_par.f90	2013-03-19 23:24:19 UTC (rev 21586)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/specfem3D_par.f90	2013-03-20 01:37:35 UTC (rev 21587)
@@ -144,6 +144,8 @@
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
   real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
 
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: wgllwgll_xy_3D,wgllwgll_xz_3D,wgllwgll_yz_3D
+
 ! Lagrange interpolators at receivers
   double precision, dimension(:), allocatable :: hxir,hetar,hpxir,hpetar,hgammar,hpgammar
   double precision, dimension(:,:), allocatable :: hxir_store,hetar_store,hgammar_store



More information about the CIG-COMMITS mailing list