[cig-commits] r21589 - seismo/3D/SPECFEM3D/trunk/src/specfem3D

dkomati1 at geodynamics.org dkomati1 at geodynamics.org
Tue Mar 19 19:27:40 PDT 2013


Author: dkomati1
Date: 2013-03-19 19:27:39 -0700 (Tue, 19 Mar 2013)
New Revision: 21589

Added:
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_Dev.F90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.F90
Removed:
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_Dev.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.f90
Modified:
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
Log:
renamed all the files that will need to be vectorized


Copied: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_Dev.F90 (from rev 21588, seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_Dev.f90)
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_Dev.F90	                        (rev 0)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_Dev.F90	2013-03-20 02:27:39 UTC (rev 21589)
@@ -0,0 +1,888 @@
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 1
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and CNRS / INRIA / University of Pau
+! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
+!                             July 2012
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! Deville routine for NGLL == 5 (default)
+
+  subroutine compute_forces_viscoelastic_Dev_5p( iphase ,NSPEC_AB,NGLOB_AB, &
+                                    displ,veloc,accel, &
+                                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                                    hprime_xx,hprime_xxT, &
+                                    hprimewgll_xx,hprimewgll_xxT, &
+                                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                                    kappastore,mustore,jacobian,ibool, &
+                                    ATTENUATION,deltat, &
+                                    one_minus_sum_beta,factor_common,&
+                                    one_minus_sum_beta_kappa,factor_common_kappa,& !ZN
+                                    alphaval,betaval,gammaval,&      !ZN
+                                    NSPEC_ATTENUATION_AB,NSPEC_ATTENUATION_AB_Kappa, &
+!ZN                                    R_xx,R_yy,R_xy,R_xz,R_yz, &
+                                    R_trace,R_xx,R_yy,R_xy,R_xz,R_yz, &  !ZN
+!ZN                                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                                    epsilondev_trace,epsilondev_xx,epsilondev_yy,epsilondev_xy, &  !ZN
+                                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                                    ANISOTROPY,NSPEC_ANISO, &
+                                    c11store,c12store,c13store,c14store,c15store,c16store,&
+                                    c22store,c23store,c24store,c25store,c26store,c33store,&
+                                    c34store,c35store,c36store,c44store,c45store,c46store,&
+                                    c55store,c56store,c66store, &
+                                    SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
+                                    is_moho_top,is_moho_bot, &
+                                    dsdx_top,dsdx_bot, &
+                                    ispec2D_moho_top,ispec2D_moho_bot, &
+                                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                                    phase_ispec_inner_elastic)
+
+
+! computes elastic tensor term
+
+  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
+                      N_SLS,SAVE_MOHO_MESH, &
+                      ONE_THIRD,FOUR_THIRDS,m1,m2,FULL_ATTENUATION_SOLID,CONST_Q_KAPPA,IOUT
+  use fault_solver_dynamic, only : Kelvin_Voigt_eta
+
+  implicit none
+
+  integer :: NSPEC_AB,NGLOB_AB
+
+! displacement, velocity and acceleration
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,veloc,accel
+
+! time step
+  real(kind=CUSTOM_REAL) :: deltat
+
+! arrays with mesh parameters per slice
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+        kappastore,mustore,jacobian
+
+! array with derivatives of Lagrange polynomials and precalculated products
+  real(kind=CUSTOM_REAL), dimension(NGLLX,5) :: hprime_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(5,NGLLX) :: hprime_xxT,hprimewgll_xx
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+! memory variables and standard linear solids for attenuation
+  logical :: ATTENUATION
+  logical :: COMPUTE_AND_STORE_STRAIN
+  integer :: NSPEC_STRAIN_ONLY, NSPEC_ADJOINT
+  integer :: NSPEC_ATTENUATION_AB,NSPEC_ATTENUATION_AB_Kappa
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: one_minus_sum_beta
+  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: factor_common
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB_Kappa) :: one_minus_sum_beta_kappa   !ZN
+  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB_Kappa) :: factor_common_kappa  !ZN
+  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB,N_SLS) :: &
+      R_xx,R_yy,R_xy,R_xz,R_yz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB_Kappa,N_SLS) :: R_trace  !ZN
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY) :: &
+       epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB_Kappa) :: epsilondev_trace  !ZN
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilon_trace_over_3
+
+! anisotropy
+  logical :: ANISOTROPY
+  integer :: NSPEC_ANISO
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
+            c11store,c12store,c13store,c14store,c15store,c16store, &
+            c22store,c23store,c24store,c25store,c26store,c33store, &
+            c34store,c35store,c36store,c44store,c45store,c46store, &
+            c55store,c56store,c66store
+
+  integer :: iphase
+  integer :: num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic
+  integer, dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
+
+! adjoint simulations
+  integer :: SIMULATION_TYPE
+  integer :: NSPEC_BOUN,NSPEC2D_MOHO
+
+  ! moho kernel
+  real(kind=CUSTOM_REAL),dimension(NDIM,NDIM,NGLLX,NGLLY,NGLLZ,NSPEC2D_MOHO):: &
+    dsdx_top,dsdx_bot
+  logical,dimension(NSPEC_BOUN) :: is_moho_top,is_moho_bot
+  integer :: ispec2D_moho_top, ispec2D_moho_bot
+
+! local parameters
+  real(kind=CUSTOM_REAL), dimension(5,5,5) :: dummyx_loc,dummyy_loc,dummyz_loc, &
+    newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
+  real(kind=CUSTOM_REAL), dimension(5,5,5) :: &
+    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
+
+  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
+  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att
+
+  ! manually inline the calls to the Deville et al. (2002) routines
+  real(kind=CUSTOM_REAL), dimension(5,25) :: B1_m1_m2_5points,B2_m1_m2_5points,B3_m1_m2_5points
+  real(kind=CUSTOM_REAL), dimension(5,25) :: C1_m1_m2_5points,C2_m1_m2_5points,C3_m1_m2_5points
+  real(kind=CUSTOM_REAL), dimension(5,25) :: E1_m1_m2_5points,E2_m1_m2_5points,E3_m1_m2_5points
+
+  equivalence(dummyx_loc,B1_m1_m2_5points)
+  equivalence(dummyy_loc,B2_m1_m2_5points)
+  equivalence(dummyz_loc,B3_m1_m2_5points)
+  equivalence(tempx1,C1_m1_m2_5points)
+  equivalence(tempy1,C2_m1_m2_5points)
+  equivalence(tempz1,C3_m1_m2_5points)
+  equivalence(newtempx1,E1_m1_m2_5points)
+  equivalence(newtempy1,E2_m1_m2_5points)
+  equivalence(newtempz1,E3_m1_m2_5points)
+
+  real(kind=CUSTOM_REAL), dimension(5,5,5) :: &
+    tempx1_att,tempx2_att,tempx3_att,tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
+  real(kind=CUSTOM_REAL), dimension(5,5,5) :: dummyx_loc_att,dummyy_loc_att,dummyz_loc_att
+  real(kind=CUSTOM_REAL), dimension(5,25) :: B1_m1_m2_5points_att,B2_m1_m2_5points_att,B3_m1_m2_5points_att
+  real(kind=CUSTOM_REAL), dimension(5,25) :: C1_m1_m2_5points_att,C2_m1_m2_5points_att,C3_m1_m2_5points_att
+
+  equivalence(dummyx_loc_att,B1_m1_m2_5points_att)
+  equivalence(dummyy_loc_att,B2_m1_m2_5points_att)
+  equivalence(dummyz_loc_att,B3_m1_m2_5points_att)
+  equivalence(tempx1_att,C1_m1_m2_5points_att)
+  equivalence(tempy1_att,C2_m1_m2_5points_att)
+  equivalence(tempz1_att,C3_m1_m2_5points_att)
+
+  real(kind=CUSTOM_REAL), dimension(25,5) :: &
+    A1_mxm_m2_m1_5points,A2_mxm_m2_m1_5points,A3_mxm_m2_m1_5points
+  real(kind=CUSTOM_REAL), dimension(25,5) :: &
+    C1_mxm_m2_m1_5points,C2_mxm_m2_m1_5points,C3_mxm_m2_m1_5points
+  real(kind=CUSTOM_REAL), dimension(25,5) :: &
+    E1_mxm_m2_m1_5points,E2_mxm_m2_m1_5points,E3_mxm_m2_m1_5points
+
+  equivalence(dummyx_loc,A1_mxm_m2_m1_5points)
+  equivalence(dummyy_loc,A2_mxm_m2_m1_5points)
+  equivalence(dummyz_loc,A3_mxm_m2_m1_5points)
+  equivalence(tempx3,C1_mxm_m2_m1_5points)
+  equivalence(tempy3,C2_mxm_m2_m1_5points)
+  equivalence(tempz3,C3_mxm_m2_m1_5points)
+  equivalence(newtempx3,E1_mxm_m2_m1_5points)
+  equivalence(newtempy3,E2_mxm_m2_m1_5points)
+  equivalence(newtempz3,E3_mxm_m2_m1_5points)
+
+  real(kind=CUSTOM_REAL), dimension(25,5) :: &
+    A1_mxm_m2_m1_5points_att,A2_mxm_m2_m1_5points_att,A3_mxm_m2_m1_5points_att
+  real(kind=CUSTOM_REAL), dimension(25,5) :: &
+    C1_mxm_m2_m1_5points_att,C2_mxm_m2_m1_5points_att,C3_mxm_m2_m1_5points_att
+
+  equivalence(dummyx_loc_att,A1_mxm_m2_m1_5points_att)
+  equivalence(dummyy_loc_att,A2_mxm_m2_m1_5points_att)
+  equivalence(dummyz_loc_att,A3_mxm_m2_m1_5points_att)
+  equivalence(tempx3_att,C1_mxm_m2_m1_5points_att)
+  equivalence(tempy3_att,C2_mxm_m2_m1_5points_att)
+  equivalence(tempz3_att,C3_mxm_m2_m1_5points_att)
+
+  ! local attenuation parameters
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: epsilondev_trace_loc,epsilondev_xx_loc, & !ZN
+       epsilondev_yy_loc, epsilondev_xy_loc, epsilondev_xz_loc, epsilondev_yz_loc
+  real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3, & !ZN
+                         R_trace_val1,R_trace_val2,R_trace_val3 !ZN
+  real(kind=CUSTOM_REAL) factor_loc,alphaval_loc,betaval_loc,gammaval_loc
+  real(kind=CUSTOM_REAL) Sn,Snp1
+  real(kind=CUSTOM_REAL) templ
+
+  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+  real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
+
+  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
+
+  real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy
+
+  real(kind=CUSTOM_REAL) fac1,fac2,fac3
+
+  real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
+  real(kind=CUSTOM_REAL) kappal
+
+  ! local anisotropy parameters
+  real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
+                        c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
+
+  integer i_SLS,imodulo_N_SLS
+  integer ispec,iglob,ispec_p,num_elements
+  integer i,j,k
+
+  real(kind=CUSTOM_REAL) :: eta
+
+  imodulo_N_SLS = mod(N_SLS,3)
+
+  ! choses inner/outer elements
+  if( iphase == 1 ) then
+    num_elements = nspec_outer_elastic
+  else
+    num_elements = nspec_inner_elastic
+  endif
+
+  do ispec_p = 1,num_elements
+
+        ! returns element id from stored element list
+        ispec = phase_ispec_inner_elastic(ispec_p,iphase)
+
+        ! adjoint simulations: moho kernel
+        if( SIMULATION_TYPE == 3 .and. SAVE_MOHO_MESH ) then
+          if (is_moho_top(ispec)) then
+            ispec2D_moho_top = ispec2D_moho_top + 1
+          else if (is_moho_bot(ispec)) then
+            ispec2D_moho_bot = ispec2D_moho_bot + 1
+          endif
+        endif ! adjoint
+
+       ! Kelvin Voigt damping: artificial viscosity around dynamic faults
+
+        ! stores displacment values in local array
+        if (allocated(Kelvin_Voigt_eta)) then
+          eta = Kelvin_Voigt_eta(ispec)
+          do k=1,NGLLZ
+            do j=1,NGLLY
+              do i=1,NGLLX
+                iglob = ibool(i,j,k,ispec)
+                dummyx_loc(i,j,k) = displ(1,iglob) + eta*veloc(1,iglob)
+                dummyy_loc(i,j,k) = displ(2,iglob) + eta*veloc(2,iglob)
+                dummyz_loc(i,j,k) = displ(3,iglob) + eta*veloc(3,iglob)
+              enddo
+            enddo
+          enddo
+
+        else
+          do k=1,NGLLZ
+            do j=1,NGLLY
+              do i=1,NGLLX
+                iglob = ibool(i,j,k,ispec)
+                dummyx_loc(i,j,k) = displ(1,iglob)
+                dummyy_loc(i,j,k) = displ(2,iglob)
+                dummyz_loc(i,j,k) = displ(3,iglob)
+              enddo
+            enddo
+          enddo
+        endif
+
+        ! use first order Taylor expansion of displacement for local storage of stresses
+        ! at this current time step, to fix attenuation in a consistent way
+        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
+           do k=1,NGLLZ
+              do j=1,NGLLY
+                 do i=1,NGLLX
+                    iglob = ibool(i,j,k,ispec)
+                    dummyx_loc_att(i,j,k) = deltat*veloc(1,iglob)
+                    dummyy_loc_att(i,j,k) = deltat*veloc(2,iglob)
+                    dummyz_loc_att(i,j,k) = deltat*veloc(3,iglob)
+                 enddo
+              enddo
+           enddo
+        endif
+
+    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+    ! for incompressible fluid flow, Cambridge University Press (2002),
+    ! pages 386 and 389 and Figure 8.3.1
+        ! call mxm_m1_m2_5points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
+        do j=1,m2
+          do i=1,m1
+            C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
+                                  hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
+                                  hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
+                                  hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
+                                  hprime_xx(i,5)*B1_m1_m2_5points(5,j)
+            C2_m1_m2_5points(i,j) = hprime_xx(i,1)*B2_m1_m2_5points(1,j) + &
+                                  hprime_xx(i,2)*B2_m1_m2_5points(2,j) + &
+                                  hprime_xx(i,3)*B2_m1_m2_5points(3,j) + &
+                                  hprime_xx(i,4)*B2_m1_m2_5points(4,j) + &
+                                  hprime_xx(i,5)*B2_m1_m2_5points(5,j)
+            C3_m1_m2_5points(i,j) = hprime_xx(i,1)*B3_m1_m2_5points(1,j) + &
+                                  hprime_xx(i,2)*B3_m1_m2_5points(2,j) + &
+                                  hprime_xx(i,3)*B3_m1_m2_5points(3,j) + &
+                                  hprime_xx(i,4)*B3_m1_m2_5points(4,j) + &
+                                  hprime_xx(i,5)*B3_m1_m2_5points(5,j)
+          enddo
+        enddo
+
+        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
+           ! temporary variables used for fixing attenuation in a consistent way
+           do j=1,m2
+              do i=1,m1
+                 C1_m1_m2_5points_att(i,j) = C1_m1_m2_5points(i,j) + &
+                      hprime_xx(i,1)*B1_m1_m2_5points_att(1,j) + &
+                      hprime_xx(i,2)*B1_m1_m2_5points_att(2,j) + &
+                      hprime_xx(i,3)*B1_m1_m2_5points_att(3,j) + &
+                      hprime_xx(i,4)*B1_m1_m2_5points_att(4,j) + &
+                      hprime_xx(i,5)*B1_m1_m2_5points_att(5,j)
+
+                 C2_m1_m2_5points_att(i,j) = C2_m1_m2_5points(i,j) + &
+                      hprime_xx(i,1)*B2_m1_m2_5points_att(1,j) + &
+                      hprime_xx(i,2)*B2_m1_m2_5points_att(2,j) + &
+                      hprime_xx(i,3)*B2_m1_m2_5points_att(3,j) + &
+                      hprime_xx(i,4)*B2_m1_m2_5points_att(4,j) + &
+                      hprime_xx(i,5)*B2_m1_m2_5points_att(5,j)
+
+                 C3_m1_m2_5points_att(i,j) = C3_m1_m2_5points(i,j) + &
+                      hprime_xx(i,1)*B3_m1_m2_5points_att(1,j) + &
+                      hprime_xx(i,2)*B3_m1_m2_5points_att(2,j) + &
+                      hprime_xx(i,3)*B3_m1_m2_5points_att(3,j) + &
+                      hprime_xx(i,4)*B3_m1_m2_5points_att(4,j) + &
+                      hprime_xx(i,5)*B3_m1_m2_5points_att(5,j)
+              enddo
+           enddo
+        endif
+
+        !   call mxm_m1_m1_5points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
+        !          hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
+        do j=1,m1
+          do i=1,m1
+            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+            do k = 1,NGLLX
+              tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
+                            dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
+                            dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
+                            dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
+                            dummyx_loc(i,5,k)*hprime_xxT(5,j)
+              tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
+                            dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
+                            dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
+                            dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
+                            dummyy_loc(i,5,k)*hprime_xxT(5,j)
+              tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
+                            dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
+                            dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
+                            dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
+                            dummyz_loc(i,5,k)*hprime_xxT(5,j)
+            enddo
+          enddo
+        enddo
+
+        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
+           ! temporary variables used for fixing attenuation in a consistent way
+           do j=1,m1
+              do i=1,m1
+                 ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+                 do k = 1,NGLLX
+                    tempx2_att(i,j,k) = tempx2(i,j,k) + &
+                         dummyx_loc_att(i,1,k)*hprime_xxT(1,j) + &
+                         dummyx_loc_att(i,2,k)*hprime_xxT(2,j) + &
+                         dummyx_loc_att(i,3,k)*hprime_xxT(3,j) + &
+                         dummyx_loc_att(i,4,k)*hprime_xxT(4,j) + &
+                         dummyx_loc_att(i,5,k)*hprime_xxT(5,j)
+
+                    tempy2_att(i,j,k) = tempy2(i,j,k) + &
+                         dummyy_loc_att(i,1,k)*hprime_xxT(1,j) + &
+                         dummyy_loc_att(i,2,k)*hprime_xxT(2,j) + &
+                         dummyy_loc_att(i,3,k)*hprime_xxT(3,j) + &
+                         dummyy_loc_att(i,4,k)*hprime_xxT(4,j) + &
+                         dummyy_loc_att(i,5,k)*hprime_xxT(5,j)
+
+                    tempz2_att(i,j,k) = tempz2(i,j,k) + &
+                         dummyz_loc_att(i,1,k)*hprime_xxT(1,j) + &
+                         dummyz_loc_att(i,2,k)*hprime_xxT(2,j) + &
+                         dummyz_loc_att(i,3,k)*hprime_xxT(3,j) + &
+                         dummyz_loc_att(i,4,k)*hprime_xxT(4,j) + &
+                         dummyz_loc_att(i,5,k)*hprime_xxT(5,j)
+                 enddo
+              enddo
+           enddo
+        endif
+
+        ! call mxm_m2_m1_5points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
+        do j=1,m1
+          do i=1,m2
+            C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+                                      A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+                                      A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+                                      A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+                                      A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+            C2_mxm_m2_m1_5points(i,j) = A2_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+                                      A2_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+                                      A2_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+                                      A2_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+                                      A2_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+            C3_mxm_m2_m1_5points(i,j) = A3_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
+                                      A3_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
+                                      A3_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
+                                      A3_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
+                                      A3_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
+          enddo
+        enddo
+
+        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
+           ! temporary variables used for fixing attenuation in a consistent way
+           do j=1,m1
+              do i=1,m2
+                 C1_mxm_m2_m1_5points_att(i,j) = C1_mxm_m2_m1_5points(i,j) + &
+                      A1_mxm_m2_m1_5points_att(i,1)*hprime_xxT(1,j) + &
+                      A1_mxm_m2_m1_5points_att(i,2)*hprime_xxT(2,j) + &
+                      A1_mxm_m2_m1_5points_att(i,3)*hprime_xxT(3,j) + &
+                      A1_mxm_m2_m1_5points_att(i,4)*hprime_xxT(4,j) + &
+                      A1_mxm_m2_m1_5points_att(i,5)*hprime_xxT(5,j)
+
+                 C2_mxm_m2_m1_5points_att(i,j) = C2_mxm_m2_m1_5points(i,j) + &
+                      A2_mxm_m2_m1_5points_att(i,1)*hprime_xxT(1,j) + &
+                      A2_mxm_m2_m1_5points_att(i,2)*hprime_xxT(2,j) + &
+                      A2_mxm_m2_m1_5points_att(i,3)*hprime_xxT(3,j) + &
+                      A2_mxm_m2_m1_5points_att(i,4)*hprime_xxT(4,j) + &
+                      A2_mxm_m2_m1_5points_att(i,5)*hprime_xxT(5,j)
+
+                 C3_mxm_m2_m1_5points_att(i,j) = C3_mxm_m2_m1_5points(i,j) + &
+                      A3_mxm_m2_m1_5points_att(i,1)*hprime_xxT(1,j) + &
+                      A3_mxm_m2_m1_5points_att(i,2)*hprime_xxT(2,j) + &
+                      A3_mxm_m2_m1_5points_att(i,3)*hprime_xxT(3,j) + &
+                      A3_mxm_m2_m1_5points_att(i,4)*hprime_xxT(4,j) + &
+                      A3_mxm_m2_m1_5points_att(i,5)*hprime_xxT(5,j)
+              enddo
+           enddo
+        endif
+
+        do k=1,NGLLZ
+          do j=1,NGLLY
+            do i=1,NGLLX
+              ! get derivatives of ux, uy and uz with respect to x, y and z
+              xixl = xix(i,j,k,ispec)
+              xiyl = xiy(i,j,k,ispec)
+              xizl = xiz(i,j,k,ispec)
+              etaxl = etax(i,j,k,ispec)
+              etayl = etay(i,j,k,ispec)
+              etazl = etaz(i,j,k,ispec)
+              gammaxl = gammax(i,j,k,ispec)
+              gammayl = gammay(i,j,k,ispec)
+              gammazl = gammaz(i,j,k,ispec)
+              jacobianl = jacobian(i,j,k,ispec)
+
+              duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
+              duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
+              duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
+
+              duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
+              duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
+              duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
+
+              duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
+              duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
+              duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
+
+              ! save strain on the Moho boundary
+              if (SAVE_MOHO_MESH ) then
+                if (is_moho_top(ispec)) then
+                  dsdx_top(1,1,i,j,k,ispec2D_moho_top) = duxdxl
+                  dsdx_top(1,2,i,j,k,ispec2D_moho_top) = duxdyl
+                  dsdx_top(1,3,i,j,k,ispec2D_moho_top) = duxdzl
+                  dsdx_top(2,1,i,j,k,ispec2D_moho_top) = duydxl
+                  dsdx_top(2,2,i,j,k,ispec2D_moho_top) = duydyl
+                  dsdx_top(2,3,i,j,k,ispec2D_moho_top) = duydzl
+                  dsdx_top(3,1,i,j,k,ispec2D_moho_top) = duzdxl
+                  dsdx_top(3,2,i,j,k,ispec2D_moho_top) = duzdyl
+                  dsdx_top(3,3,i,j,k,ispec2D_moho_top) = duzdzl
+                else if (is_moho_bot(ispec)) then
+                  dsdx_bot(1,1,i,j,k,ispec2D_moho_bot) = duxdxl
+                  dsdx_bot(1,2,i,j,k,ispec2D_moho_bot) = duxdyl
+                  dsdx_bot(1,3,i,j,k,ispec2D_moho_bot) = duxdzl
+                  dsdx_bot(2,1,i,j,k,ispec2D_moho_bot) = duydxl
+                  dsdx_bot(2,2,i,j,k,ispec2D_moho_bot) = duydyl
+                  dsdx_bot(2,3,i,j,k,ispec2D_moho_bot) = duydzl
+                  dsdx_bot(3,1,i,j,k,ispec2D_moho_bot) = duzdxl
+                  dsdx_bot(3,2,i,j,k,ispec2D_moho_bot) = duzdyl
+                  dsdx_bot(3,3,i,j,k,ispec2D_moho_bot) = duzdzl
+                endif
+              endif
+
+              ! precompute some sums to save CPU time
+              duxdxl_plus_duydyl = duxdxl + duydyl
+              duxdxl_plus_duzdzl = duxdxl + duzdzl
+              duydyl_plus_duzdzl = duydyl + duzdzl
+              duxdyl_plus_duydxl = duxdyl + duydxl
+              duzdxl_plus_duxdzl = duzdxl + duxdzl
+              duzdyl_plus_duydzl = duzdyl + duydzl
+
+              if ( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
+                 ! temporary variables used for fixing attenuation in a consistent way
+                 duxdxl_att = xixl*tempx1_att(i,j,k) + etaxl*tempx2_att(i,j,k) + gammaxl*tempx3_att(i,j,k)
+                 duxdyl_att = xiyl*tempx1_att(i,j,k) + etayl*tempx2_att(i,j,k) + gammayl*tempx3_att(i,j,k)
+                 duxdzl_att = xizl*tempx1_att(i,j,k) + etazl*tempx2_att(i,j,k) + gammazl*tempx3_att(i,j,k)
+
+                 duydxl_att = xixl*tempy1_att(i,j,k) + etaxl*tempy2_att(i,j,k) + gammaxl*tempy3_att(i,j,k)
+                 duydyl_att = xiyl*tempy1_att(i,j,k) + etayl*tempy2_att(i,j,k) + gammayl*tempy3_att(i,j,k)
+                 duydzl_att = xizl*tempy1_att(i,j,k) + etazl*tempy2_att(i,j,k) + gammazl*tempy3_att(i,j,k)
+
+                 duzdxl_att = xixl*tempz1_att(i,j,k) + etaxl*tempz2_att(i,j,k) + gammaxl*tempz3_att(i,j,k)
+                 duzdyl_att = xiyl*tempz1_att(i,j,k) + etayl*tempz2_att(i,j,k) + gammayl*tempz3_att(i,j,k)
+                 duzdzl_att = xizl*tempz1_att(i,j,k) + etazl*tempz2_att(i,j,k) + gammazl*tempz3_att(i,j,k)
+
+                 ! precompute some sums to save CPU time
+                 duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
+                 duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
+                 duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
+
+                 ! compute deviatoric strain
+                 templ = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
+                 if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
+                 if(FULL_ATTENUATION_SOLID) epsilondev_trace_loc(i,j,k) = 3.0 * templ
+                 epsilondev_xx_loc(i,j,k) = duxdxl_att - templ
+                 epsilondev_yy_loc(i,j,k) = duydyl_att - templ
+                 epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl_att
+                 epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
+                 epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl_att
+              else
+                 ! computes deviatoric strain attenuation and/or for kernel calculations
+                 if (COMPUTE_AND_STORE_STRAIN) then
+                    templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+                    if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
+                    if(FULL_ATTENUATION_SOLID) epsilondev_trace_loc(i,j,k) = 3.0 * templ
+                    epsilondev_xx_loc(i,j,k) = duxdxl - templ
+                    epsilondev_yy_loc(i,j,k) = duydyl - templ
+                    epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl
+                    epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl
+                    epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl
+                 endif
+              endif
+
+              kappal = kappastore(i,j,k,ispec)
+              mul = mustore(i,j,k,ispec)
+
+              ! attenuation
+              if(ATTENUATION) then
+                ! use unrelaxed parameters if attenuation
+                mul  = mul * one_minus_sum_beta(i,j,k,ispec)
+                if(FULL_ATTENUATION_SOLID) then  !ZN
+                   kappal  = kappal * one_minus_sum_beta_kappa(i,j,k,ispec)  !ZN
+                endif  !ZN
+              endif
+
+  ! full anisotropic case, stress calculations
+              if(ANISOTROPY) then
+                c11 = c11store(i,j,k,ispec)
+                c12 = c12store(i,j,k,ispec)
+                c13 = c13store(i,j,k,ispec)
+                c14 = c14store(i,j,k,ispec)
+                c15 = c15store(i,j,k,ispec)
+                c16 = c16store(i,j,k,ispec)
+                c22 = c22store(i,j,k,ispec)
+                c23 = c23store(i,j,k,ispec)
+                c24 = c24store(i,j,k,ispec)
+                c25 = c25store(i,j,k,ispec)
+                c26 = c26store(i,j,k,ispec)
+                c33 = c33store(i,j,k,ispec)
+                c34 = c34store(i,j,k,ispec)
+                c35 = c35store(i,j,k,ispec)
+                c36 = c36store(i,j,k,ispec)
+                c44 = c44store(i,j,k,ispec)
+                c45 = c45store(i,j,k,ispec)
+                c46 = c46store(i,j,k,ispec)
+                c55 = c55store(i,j,k,ispec)
+                c56 = c56store(i,j,k,ispec)
+                c66 = c66store(i,j,k,ispec)
+
+                sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
+                          c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
+                sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
+                          c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
+                sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
+                          c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
+                sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
+                          c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
+                sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
+                          c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
+                sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
+                          c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
+
+              else
+
+  ! isotropic case
+                lambdalplus2mul = kappal + FOUR_THIRDS * mul
+                lambdal = lambdalplus2mul - 2.*mul
+
+                ! compute stress sigma
+                sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
+                sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
+                sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
+
+                sigma_xy = mul*duxdyl_plus_duydxl
+                sigma_xz = mul*duzdxl_plus_duxdzl
+                sigma_yz = mul*duzdyl_plus_duydzl
+
+              endif ! ANISOTROPY
+
+              ! subtract memory variables if attenuation
+              if(ATTENUATION) then
+! way 1
+!                do i_sls = 1,N_SLS
+!                  R_xx_val = R_xx(i,j,k,ispec,i_sls)
+!                  R_yy_val = R_yy(i,j,k,ispec,i_sls)
+!                  sigma_xx = sigma_xx - R_xx_val
+!                  sigma_yy = sigma_yy - R_yy_val
+!                  sigma_zz = sigma_zz + R_xx_val + R_yy_val
+!                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
+!                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
+!                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
+!                enddo
+
+! way 2
+! note: this should help compilers to pipeline the code and make better use of the cache;
+!          depending on compilers, it can further decrease the computation time by ~ 30%.
+!          by default, N_SLS = 3, therefore we take steps of 3
+              if(imodulo_N_SLS >= 1) then
+                do i_sls = 1,imodulo_N_SLS
+                  if(FULL_ATTENUATION_SOLID) then !! ZN: for performance, it would be better to avoid "if" statements inside loops
+                    R_trace_val1 = R_trace(i,j,k,ispec,i_sls)
+                  else
+                    R_trace_val1 = 0.
+                  endif
+                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
+                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
+                  sigma_xx = sigma_xx - R_xx_val1 - R_trace_val1
+                  sigma_yy = sigma_yy - R_yy_val1 - R_trace_val1
+                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1 - R_trace_val1
+                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
+                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
+                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
+                enddo
+              endif
+
+              if(N_SLS >= imodulo_N_SLS+1) then
+                do i_sls = imodulo_N_SLS+1,N_SLS,3
+                  if(FULL_ATTENUATION_SOLID) then !! ZN: for performance, it would be better to avoid "if" statements inside loops
+                    R_trace_val1 = R_trace(i,j,k,ispec,i_sls)
+                  else
+                    R_trace_val1 = 0.
+                  endif
+                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
+                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
+                  sigma_xx = sigma_xx - R_xx_val1 - R_trace_val1
+                  sigma_yy = sigma_yy - R_yy_val1 - R_trace_val1
+                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1 - R_trace_val1
+                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
+                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
+                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
+                  if(FULL_ATTENUATION_SOLID) then
+                    R_trace_val2 = R_trace(i,j,k,ispec,i_sls+1)
+                  else
+                    R_trace_val2 = 0.
+                  endif
+                  R_xx_val2 = R_xx(i,j,k,ispec,i_sls+1)
+                  R_yy_val2 = R_yy(i,j,k,ispec,i_sls+1)
+                  sigma_xx = sigma_xx - R_xx_val2 - R_trace_val2
+                  sigma_yy = sigma_yy - R_yy_val2 - R_trace_val2
+                  sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2 - R_trace_val2
+                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+1)
+                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+1)
+                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+1)
+
+                  if(FULL_ATTENUATION_SOLID) then
+                    R_trace_val3 = R_trace(i,j,k,ispec,i_sls+2)
+                  else
+                    R_trace_val3 = 0.
+                  endif
+                  R_xx_val3 = R_xx(i,j,k,ispec,i_sls+2)
+                  R_yy_val3 = R_yy(i,j,k,ispec,i_sls+2)
+                  sigma_xx = sigma_xx - R_xx_val3 - R_trace_val3
+                  sigma_yy = sigma_yy - R_yy_val3 - R_trace_val3
+                  sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3 - R_trace_val3
+                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+2)
+                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+2)
+                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+2)
+                enddo
+              endif
+
+
+              endif
+
+            ! define symmetric components of sigma
+            sigma_yx = sigma_xy
+            sigma_zx = sigma_xz
+            sigma_zy = sigma_yz
+
+            ! form dot product with test vector, non-symmetric form (which is useful in the case of PML)
+            tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
+            tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
+            tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
+
+            tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
+            tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
+            tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
+
+            tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
+            tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
+            tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
+
+            enddo
+          enddo
+        enddo
+
+    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+    ! for incompressible fluid flow, Cambridge University Press (2002),
+    ! pages 386 and 389 and Figure 8.3.1
+        ! call mxm_m1_m2_5points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
+        do j=1,m2
+          do i=1,m1
+            E1_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_5points(1,j) + &
+                                  hprimewgll_xxT(i,2)*C1_m1_m2_5points(2,j) + &
+                                  hprimewgll_xxT(i,3)*C1_m1_m2_5points(3,j) + &
+                                  hprimewgll_xxT(i,4)*C1_m1_m2_5points(4,j) + &
+                                  hprimewgll_xxT(i,5)*C1_m1_m2_5points(5,j)
+            E2_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_5points(1,j) + &
+                                  hprimewgll_xxT(i,2)*C2_m1_m2_5points(2,j) + &
+                                  hprimewgll_xxT(i,3)*C2_m1_m2_5points(3,j) + &
+                                  hprimewgll_xxT(i,4)*C2_m1_m2_5points(4,j) + &
+                                  hprimewgll_xxT(i,5)*C2_m1_m2_5points(5,j)
+            E3_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_5points(1,j) + &
+                                  hprimewgll_xxT(i,2)*C3_m1_m2_5points(2,j) + &
+                                  hprimewgll_xxT(i,3)*C3_m1_m2_5points(3,j) + &
+                                  hprimewgll_xxT(i,4)*C3_m1_m2_5points(4,j) + &
+                                  hprimewgll_xxT(i,5)*C3_m1_m2_5points(5,j)
+          enddo
+        enddo
+
+        !   call mxm_m1_m1_5points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
+        !         hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
+        do i=1,m1
+          do j=1,m1
+            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+            do k = 1,NGLLX
+              newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
+                               tempx2(i,2,k)*hprimewgll_xx(2,j) + &
+                               tempx2(i,3,k)*hprimewgll_xx(3,j) + &
+                               tempx2(i,4,k)*hprimewgll_xx(4,j) + &
+                               tempx2(i,5,k)*hprimewgll_xx(5,j)
+              newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
+                               tempy2(i,2,k)*hprimewgll_xx(2,j) + &
+                               tempy2(i,3,k)*hprimewgll_xx(3,j) + &
+                               tempy2(i,4,k)*hprimewgll_xx(4,j) + &
+                               tempy2(i,5,k)*hprimewgll_xx(5,j)
+              newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
+                               tempz2(i,2,k)*hprimewgll_xx(2,j) + &
+                               tempz2(i,3,k)*hprimewgll_xx(3,j) + &
+                               tempz2(i,4,k)*hprimewgll_xx(4,j) + &
+                               tempz2(i,5,k)*hprimewgll_xx(5,j)
+            enddo
+          enddo
+        enddo
+
+        ! call mxm_m2_m1_5points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
+        do j=1,m1
+          do i=1,m2
+            E1_mxm_m2_m1_5points(i,j) = C1_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
+                                      C1_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
+                                      C1_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
+                                      C1_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
+                                      C1_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
+            E2_mxm_m2_m1_5points(i,j) = C2_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
+                                      C2_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
+                                      C2_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
+                                      C2_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
+                                      C2_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
+            E3_mxm_m2_m1_5points(i,j) = C3_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
+                                      C3_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
+                                      C3_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
+                                      C3_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
+                                      C3_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
+          enddo
+        enddo
+
+        do k=1,NGLLZ
+          do j=1,NGLLY
+            do i=1,NGLLX
+
+              fac1 = wgllwgll_yz(j,k)
+              fac2 = wgllwgll_xz(i,k)
+              fac3 = wgllwgll_xy(i,j)
+
+              ! sum contributions from each element to the global mesh using indirect addressing
+              iglob = ibool(i,j,k,ispec)
+              accel(1,iglob) = accel(1,iglob) - fac1*newtempx1(i,j,k) - &
+                                fac2*newtempx2(i,j,k) - fac3*newtempx3(i,j,k)
+              accel(2,iglob) = accel(2,iglob) - fac1*newtempy1(i,j,k) - &
+                                fac2*newtempy2(i,j,k) - fac3*newtempy3(i,j,k)
+              accel(3,iglob) = accel(3,iglob) - fac1*newtempz1(i,j,k) - &
+                                fac2*newtempz2(i,j,k) - fac3*newtempz3(i,j,k)
+
+              !  update memory variables based upon the Runge-Kutta scheme
+              if(ATTENUATION) then
+
+                 ! use Runge-Kutta scheme to march in time
+                 do i_sls = 1,N_SLS
+
+                    alphaval_loc = alphaval(i_sls)
+                    betaval_loc = betaval(i_sls)
+                    gammaval_loc = gammaval(i_sls)
+
+                    if(FULL_ATTENUATION_SOLID) then
+                      ! term in trace  !ZN
+                      factor_loc = kappastore(i,j,k,ispec) * factor_common_kappa(i_sls,i,j,k,ispec)  !ZN
+
+                      Sn   = factor_loc * epsilondev_trace(i,j,k,ispec)  !ZN
+                      Snp1   = factor_loc * epsilondev_trace_loc(i,j,k)  !ZN
+                      R_trace(i,j,k,ispec,i_sls) = alphaval_loc * R_trace(i,j,k,ispec,i_sls) + &  !ZN
+                                        betaval_loc * Sn + gammaval_loc * Snp1  !ZN
+                    endif
+
+                    ! term in xx yy zz xy xz yz
+                    factor_loc = mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
+
+                    ! term in xx
+                    Sn   = factor_loc * epsilondev_xx(i,j,k,ispec)
+                    Snp1   = factor_loc * epsilondev_xx_loc(i,j,k)
+                    R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) + &
+                                      betaval_loc * Sn + gammaval_loc * Snp1
+                    ! term in yy
+                    Sn   = factor_loc * epsilondev_yy(i,j,k,ispec)
+                    Snp1   = factor_loc * epsilondev_yy_loc(i,j,k)
+                    R_yy(i,j,k,ispec,i_sls) = alphaval_loc * R_yy(i,j,k,ispec,i_sls) + &
+                                      betaval_loc * Sn + gammaval_loc * Snp1
+                    ! term in zz not computed since zero trace
+                    ! term in xy
+                    Sn   = factor_loc * epsilondev_xy(i,j,k,ispec)
+                    Snp1   = factor_loc * epsilondev_xy_loc(i,j,k)
+                    R_xy(i,j,k,ispec,i_sls) = alphaval_loc * R_xy(i,j,k,ispec,i_sls) + &
+                                      betaval_loc * Sn + gammaval_loc * Snp1
+                    ! term in xz
+                    Sn   = factor_loc * epsilondev_xz(i,j,k,ispec)
+                    Snp1   = factor_loc * epsilondev_xz_loc(i,j,k)
+                    R_xz(i,j,k,ispec,i_sls) = alphaval_loc * R_xz(i,j,k,ispec,i_sls) + &
+                                      betaval_loc * Sn + gammaval_loc * Snp1
+                    ! term in yz
+                    Sn   = factor_loc * epsilondev_yz(i,j,k,ispec)
+                    Snp1   = factor_loc * epsilondev_yz_loc(i,j,k)
+                    R_yz(i,j,k,ispec,i_sls) = alphaval_loc * R_yz(i,j,k,ispec,i_sls) + &
+                                      betaval_loc * Sn + gammaval_loc * Snp1
+                 enddo   ! end of loop on memory variables
+
+              endif  !  end attenuation
+
+            enddo
+          enddo
+        enddo
+
+        ! save deviatoric strain for Runge-Kutta scheme
+        if ( COMPUTE_AND_STORE_STRAIN ) then
+          if(FULL_ATTENUATION_SOLID) epsilondev_trace(:,:,:,ispec) = epsilondev_trace_loc(:,:,:)
+          epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
+          epsilondev_yy(:,:,:,ispec) = epsilondev_yy_loc(:,:,:)
+          epsilondev_xy(:,:,:,ispec) = epsilondev_xy_loc(:,:,:)
+          epsilondev_xz(:,:,:,ispec) = epsilondev_xz_loc(:,:,:)
+          epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:)
+        endif
+
+  enddo  ! spectral element loop
+
+  end subroutine compute_forces_viscoelastic_Dev_5p
+

Deleted: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_Dev.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_Dev.f90	2013-03-20 02:13:06 UTC (rev 21588)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_Dev.f90	2013-03-20 02:27:39 UTC (rev 21589)
@@ -1,888 +0,0 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 1
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and CNRS / INRIA / University of Pau
-! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
-!                             July 2012
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! Deville routine for NGLL == 5 (default)
-
-  subroutine compute_forces_viscoelastic_Dev_5p( iphase ,NSPEC_AB,NGLOB_AB, &
-                                    displ,veloc,accel, &
-                                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                                    hprime_xx,hprime_xxT, &
-                                    hprimewgll_xx,hprimewgll_xxT, &
-                                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                                    kappastore,mustore,jacobian,ibool, &
-                                    ATTENUATION,deltat, &
-                                    one_minus_sum_beta,factor_common,&
-                                    one_minus_sum_beta_kappa,factor_common_kappa,& !ZN
-                                    alphaval,betaval,gammaval,&      !ZN
-                                    NSPEC_ATTENUATION_AB,NSPEC_ATTENUATION_AB_Kappa, &
-!ZN                                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                                    R_trace,R_xx,R_yy,R_xy,R_xz,R_yz, &  !ZN
-!ZN                                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                                    epsilondev_trace,epsilondev_xx,epsilondev_yy,epsilondev_xy, &  !ZN
-                                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                                    ANISOTROPY,NSPEC_ANISO, &
-                                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                                    c55store,c56store,c66store, &
-                                    SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
-                                    is_moho_top,is_moho_bot, &
-                                    dsdx_top,dsdx_bot, &
-                                    ispec2D_moho_top,ispec2D_moho_bot, &
-                                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                                    phase_ispec_inner_elastic)
-
-
-! computes elastic tensor term
-
-  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
-                      N_SLS,SAVE_MOHO_MESH, &
-                      ONE_THIRD,FOUR_THIRDS,m1,m2,FULL_ATTENUATION_SOLID,CONST_Q_KAPPA,IOUT
-  use fault_solver_dynamic, only : Kelvin_Voigt_eta
-
-  implicit none
-
-  integer :: NSPEC_AB,NGLOB_AB
-
-! displacement, velocity and acceleration
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,veloc,accel
-
-! time step
-  real(kind=CUSTOM_REAL) :: deltat
-
-! arrays with mesh parameters per slice
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        kappastore,mustore,jacobian
-
-! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,5) :: hprime_xx,hprimewgll_xxT
-  real(kind=CUSTOM_REAL), dimension(5,NGLLX) :: hprime_xxT,hprimewgll_xx
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-! memory variables and standard linear solids for attenuation
-  logical :: ATTENUATION
-  logical :: COMPUTE_AND_STORE_STRAIN
-  integer :: NSPEC_STRAIN_ONLY, NSPEC_ADJOINT
-  integer :: NSPEC_ATTENUATION_AB,NSPEC_ATTENUATION_AB_Kappa
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: one_minus_sum_beta
-  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: factor_common
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB_Kappa) :: one_minus_sum_beta_kappa   !ZN
-  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB_Kappa) :: factor_common_kappa  !ZN
-  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB,N_SLS) :: &
-      R_xx,R_yy,R_xy,R_xz,R_yz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB_Kappa,N_SLS) :: R_trace  !ZN
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY) :: &
-       epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB_Kappa) :: epsilondev_trace  !ZN
-  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilon_trace_over_3
-
-! anisotropy
-  logical :: ANISOTROPY
-  integer :: NSPEC_ANISO
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
-            c11store,c12store,c13store,c14store,c15store,c16store, &
-            c22store,c23store,c24store,c25store,c26store,c33store, &
-            c34store,c35store,c36store,c44store,c45store,c46store, &
-            c55store,c56store,c66store
-
-  integer :: iphase
-  integer :: num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic
-  integer, dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
-
-! adjoint simulations
-  integer :: SIMULATION_TYPE
-  integer :: NSPEC_BOUN,NSPEC2D_MOHO
-
-  ! moho kernel
-  real(kind=CUSTOM_REAL),dimension(NDIM,NDIM,NGLLX,NGLLY,NGLLZ,NSPEC2D_MOHO):: &
-    dsdx_top,dsdx_bot
-  logical,dimension(NSPEC_BOUN) :: is_moho_top,is_moho_bot
-  integer :: ispec2D_moho_top, ispec2D_moho_bot
-
-! local parameters
-  real(kind=CUSTOM_REAL), dimension(5,5,5) :: dummyx_loc,dummyy_loc,dummyz_loc, &
-    newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
-  real(kind=CUSTOM_REAL), dimension(5,5,5) :: &
-    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
-
-  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
-  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att
-
-  ! manually inline the calls to the Deville et al. (2002) routines
-  real(kind=CUSTOM_REAL), dimension(5,25) :: B1_m1_m2_5points,B2_m1_m2_5points,B3_m1_m2_5points
-  real(kind=CUSTOM_REAL), dimension(5,25) :: C1_m1_m2_5points,C2_m1_m2_5points,C3_m1_m2_5points
-  real(kind=CUSTOM_REAL), dimension(5,25) :: E1_m1_m2_5points,E2_m1_m2_5points,E3_m1_m2_5points
-
-  equivalence(dummyx_loc,B1_m1_m2_5points)
-  equivalence(dummyy_loc,B2_m1_m2_5points)
-  equivalence(dummyz_loc,B3_m1_m2_5points)
-  equivalence(tempx1,C1_m1_m2_5points)
-  equivalence(tempy1,C2_m1_m2_5points)
-  equivalence(tempz1,C3_m1_m2_5points)
-  equivalence(newtempx1,E1_m1_m2_5points)
-  equivalence(newtempy1,E2_m1_m2_5points)
-  equivalence(newtempz1,E3_m1_m2_5points)
-
-  real(kind=CUSTOM_REAL), dimension(5,5,5) :: &
-    tempx1_att,tempx2_att,tempx3_att,tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
-  real(kind=CUSTOM_REAL), dimension(5,5,5) :: dummyx_loc_att,dummyy_loc_att,dummyz_loc_att
-  real(kind=CUSTOM_REAL), dimension(5,25) :: B1_m1_m2_5points_att,B2_m1_m2_5points_att,B3_m1_m2_5points_att
-  real(kind=CUSTOM_REAL), dimension(5,25) :: C1_m1_m2_5points_att,C2_m1_m2_5points_att,C3_m1_m2_5points_att
-
-  equivalence(dummyx_loc_att,B1_m1_m2_5points_att)
-  equivalence(dummyy_loc_att,B2_m1_m2_5points_att)
-  equivalence(dummyz_loc_att,B3_m1_m2_5points_att)
-  equivalence(tempx1_att,C1_m1_m2_5points_att)
-  equivalence(tempy1_att,C2_m1_m2_5points_att)
-  equivalence(tempz1_att,C3_m1_m2_5points_att)
-
-  real(kind=CUSTOM_REAL), dimension(25,5) :: &
-    A1_mxm_m2_m1_5points,A2_mxm_m2_m1_5points,A3_mxm_m2_m1_5points
-  real(kind=CUSTOM_REAL), dimension(25,5) :: &
-    C1_mxm_m2_m1_5points,C2_mxm_m2_m1_5points,C3_mxm_m2_m1_5points
-  real(kind=CUSTOM_REAL), dimension(25,5) :: &
-    E1_mxm_m2_m1_5points,E2_mxm_m2_m1_5points,E3_mxm_m2_m1_5points
-
-  equivalence(dummyx_loc,A1_mxm_m2_m1_5points)
-  equivalence(dummyy_loc,A2_mxm_m2_m1_5points)
-  equivalence(dummyz_loc,A3_mxm_m2_m1_5points)
-  equivalence(tempx3,C1_mxm_m2_m1_5points)
-  equivalence(tempy3,C2_mxm_m2_m1_5points)
-  equivalence(tempz3,C3_mxm_m2_m1_5points)
-  equivalence(newtempx3,E1_mxm_m2_m1_5points)
-  equivalence(newtempy3,E2_mxm_m2_m1_5points)
-  equivalence(newtempz3,E3_mxm_m2_m1_5points)
-
-  real(kind=CUSTOM_REAL), dimension(25,5) :: &
-    A1_mxm_m2_m1_5points_att,A2_mxm_m2_m1_5points_att,A3_mxm_m2_m1_5points_att
-  real(kind=CUSTOM_REAL), dimension(25,5) :: &
-    C1_mxm_m2_m1_5points_att,C2_mxm_m2_m1_5points_att,C3_mxm_m2_m1_5points_att
-
-  equivalence(dummyx_loc_att,A1_mxm_m2_m1_5points_att)
-  equivalence(dummyy_loc_att,A2_mxm_m2_m1_5points_att)
-  equivalence(dummyz_loc_att,A3_mxm_m2_m1_5points_att)
-  equivalence(tempx3_att,C1_mxm_m2_m1_5points_att)
-  equivalence(tempy3_att,C2_mxm_m2_m1_5points_att)
-  equivalence(tempz3_att,C3_mxm_m2_m1_5points_att)
-
-  ! local attenuation parameters
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: epsilondev_trace_loc,epsilondev_xx_loc, & !ZN
-       epsilondev_yy_loc, epsilondev_xy_loc, epsilondev_xz_loc, epsilondev_yz_loc
-  real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3, & !ZN
-                         R_trace_val1,R_trace_val2,R_trace_val3 !ZN
-  real(kind=CUSTOM_REAL) factor_loc,alphaval_loc,betaval_loc,gammaval_loc
-  real(kind=CUSTOM_REAL) Sn,Snp1
-  real(kind=CUSTOM_REAL) templ
-
-  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
-  real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
-
-  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
-
-  real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy
-
-  real(kind=CUSTOM_REAL) fac1,fac2,fac3
-
-  real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
-  real(kind=CUSTOM_REAL) kappal
-
-  ! local anisotropy parameters
-  real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
-                        c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
-
-  integer i_SLS,imodulo_N_SLS
-  integer ispec,iglob,ispec_p,num_elements
-  integer i,j,k
-
-  real(kind=CUSTOM_REAL) :: eta
-
-  imodulo_N_SLS = mod(N_SLS,3)
-
-  ! choses inner/outer elements
-  if( iphase == 1 ) then
-    num_elements = nspec_outer_elastic
-  else
-    num_elements = nspec_inner_elastic
-  endif
-
-  do ispec_p = 1,num_elements
-
-        ! returns element id from stored element list
-        ispec = phase_ispec_inner_elastic(ispec_p,iphase)
-
-        ! adjoint simulations: moho kernel
-        if( SIMULATION_TYPE == 3 .and. SAVE_MOHO_MESH ) then
-          if (is_moho_top(ispec)) then
-            ispec2D_moho_top = ispec2D_moho_top + 1
-          else if (is_moho_bot(ispec)) then
-            ispec2D_moho_bot = ispec2D_moho_bot + 1
-          endif
-        endif ! adjoint
-
-       ! Kelvin Voigt damping: artificial viscosity around dynamic faults
-
-        ! stores displacment values in local array
-        if (allocated(Kelvin_Voigt_eta)) then
-          eta = Kelvin_Voigt_eta(ispec)
-          do k=1,NGLLZ
-            do j=1,NGLLY
-              do i=1,NGLLX
-                iglob = ibool(i,j,k,ispec)
-                dummyx_loc(i,j,k) = displ(1,iglob) + eta*veloc(1,iglob)
-                dummyy_loc(i,j,k) = displ(2,iglob) + eta*veloc(2,iglob)
-                dummyz_loc(i,j,k) = displ(3,iglob) + eta*veloc(3,iglob)
-              enddo
-            enddo
-          enddo
-
-        else
-          do k=1,NGLLZ
-            do j=1,NGLLY
-              do i=1,NGLLX
-                iglob = ibool(i,j,k,ispec)
-                dummyx_loc(i,j,k) = displ(1,iglob)
-                dummyy_loc(i,j,k) = displ(2,iglob)
-                dummyz_loc(i,j,k) = displ(3,iglob)
-              enddo
-            enddo
-          enddo
-        endif
-
-        ! use first order Taylor expansion of displacement for local storage of stresses
-        ! at this current time step, to fix attenuation in a consistent way
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           do k=1,NGLLZ
-              do j=1,NGLLY
-                 do i=1,NGLLX
-                    iglob = ibool(i,j,k,ispec)
-                    dummyx_loc_att(i,j,k) = deltat*veloc(1,iglob)
-                    dummyy_loc_att(i,j,k) = deltat*veloc(2,iglob)
-                    dummyz_loc_att(i,j,k) = deltat*veloc(3,iglob)
-                 enddo
-              enddo
-           enddo
-        endif
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-        ! call mxm_m1_m2_5points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
-        do j=1,m2
-          do i=1,m1
-            C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
-                                  hprime_xx(i,2)*B1_m1_m2_5points(2,j) + &
-                                  hprime_xx(i,3)*B1_m1_m2_5points(3,j) + &
-                                  hprime_xx(i,4)*B1_m1_m2_5points(4,j) + &
-                                  hprime_xx(i,5)*B1_m1_m2_5points(5,j)
-            C2_m1_m2_5points(i,j) = hprime_xx(i,1)*B2_m1_m2_5points(1,j) + &
-                                  hprime_xx(i,2)*B2_m1_m2_5points(2,j) + &
-                                  hprime_xx(i,3)*B2_m1_m2_5points(3,j) + &
-                                  hprime_xx(i,4)*B2_m1_m2_5points(4,j) + &
-                                  hprime_xx(i,5)*B2_m1_m2_5points(5,j)
-            C3_m1_m2_5points(i,j) = hprime_xx(i,1)*B3_m1_m2_5points(1,j) + &
-                                  hprime_xx(i,2)*B3_m1_m2_5points(2,j) + &
-                                  hprime_xx(i,3)*B3_m1_m2_5points(3,j) + &
-                                  hprime_xx(i,4)*B3_m1_m2_5points(4,j) + &
-                                  hprime_xx(i,5)*B3_m1_m2_5points(5,j)
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m2
-              do i=1,m1
-                 C1_m1_m2_5points_att(i,j) = C1_m1_m2_5points(i,j) + &
-                      hprime_xx(i,1)*B1_m1_m2_5points_att(1,j) + &
-                      hprime_xx(i,2)*B1_m1_m2_5points_att(2,j) + &
-                      hprime_xx(i,3)*B1_m1_m2_5points_att(3,j) + &
-                      hprime_xx(i,4)*B1_m1_m2_5points_att(4,j) + &
-                      hprime_xx(i,5)*B1_m1_m2_5points_att(5,j)
-
-                 C2_m1_m2_5points_att(i,j) = C2_m1_m2_5points(i,j) + &
-                      hprime_xx(i,1)*B2_m1_m2_5points_att(1,j) + &
-                      hprime_xx(i,2)*B2_m1_m2_5points_att(2,j) + &
-                      hprime_xx(i,3)*B2_m1_m2_5points_att(3,j) + &
-                      hprime_xx(i,4)*B2_m1_m2_5points_att(4,j) + &
-                      hprime_xx(i,5)*B2_m1_m2_5points_att(5,j)
-
-                 C3_m1_m2_5points_att(i,j) = C3_m1_m2_5points(i,j) + &
-                      hprime_xx(i,1)*B3_m1_m2_5points_att(1,j) + &
-                      hprime_xx(i,2)*B3_m1_m2_5points_att(2,j) + &
-                      hprime_xx(i,3)*B3_m1_m2_5points_att(3,j) + &
-                      hprime_xx(i,4)*B3_m1_m2_5points_att(4,j) + &
-                      hprime_xx(i,5)*B3_m1_m2_5points_att(5,j)
-              enddo
-           enddo
-        endif
-
-        !   call mxm_m1_m1_5points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
-        !          hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
-        do j=1,m1
-          do i=1,m1
-            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-            do k = 1,NGLLX
-              tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyx_loc(i,5,k)*hprime_xxT(5,j)
-              tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyy_loc(i,5,k)*hprime_xxT(5,j)
-              tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyz_loc(i,5,k)*hprime_xxT(5,j)
-            enddo
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m1
-              do i=1,m1
-                 ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-                 do k = 1,NGLLX
-                    tempx2_att(i,j,k) = tempx2(i,j,k) + &
-                         dummyx_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyx_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyx_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyx_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyx_loc_att(i,5,k)*hprime_xxT(5,j)
-
-                    tempy2_att(i,j,k) = tempy2(i,j,k) + &
-                         dummyy_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyy_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyy_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyy_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyy_loc_att(i,5,k)*hprime_xxT(5,j)
-
-                    tempz2_att(i,j,k) = tempz2(i,j,k) + &
-                         dummyz_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyz_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyz_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyz_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyz_loc_att(i,5,k)*hprime_xxT(5,j)
-                 enddo
-              enddo
-           enddo
-        endif
-
-        ! call mxm_m2_m1_5points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
-        do j=1,m1
-          do i=1,m2
-            C1_mxm_m2_m1_5points(i,j) = A1_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
-                                      A1_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
-                                      A1_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
-                                      A1_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
-                                      A1_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-            C2_mxm_m2_m1_5points(i,j) = A2_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
-                                      A2_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
-                                      A2_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
-                                      A2_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
-                                      A2_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-            C3_mxm_m2_m1_5points(i,j) = A3_mxm_m2_m1_5points(i,1)*hprime_xxT(1,j) + &
-                                      A3_mxm_m2_m1_5points(i,2)*hprime_xxT(2,j) + &
-                                      A3_mxm_m2_m1_5points(i,3)*hprime_xxT(3,j) + &
-                                      A3_mxm_m2_m1_5points(i,4)*hprime_xxT(4,j) + &
-                                      A3_mxm_m2_m1_5points(i,5)*hprime_xxT(5,j)
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m1
-              do i=1,m2
-                 C1_mxm_m2_m1_5points_att(i,j) = C1_mxm_m2_m1_5points(i,j) + &
-                      A1_mxm_m2_m1_5points_att(i,1)*hprime_xxT(1,j) + &
-                      A1_mxm_m2_m1_5points_att(i,2)*hprime_xxT(2,j) + &
-                      A1_mxm_m2_m1_5points_att(i,3)*hprime_xxT(3,j) + &
-                      A1_mxm_m2_m1_5points_att(i,4)*hprime_xxT(4,j) + &
-                      A1_mxm_m2_m1_5points_att(i,5)*hprime_xxT(5,j)
-
-                 C2_mxm_m2_m1_5points_att(i,j) = C2_mxm_m2_m1_5points(i,j) + &
-                      A2_mxm_m2_m1_5points_att(i,1)*hprime_xxT(1,j) + &
-                      A2_mxm_m2_m1_5points_att(i,2)*hprime_xxT(2,j) + &
-                      A2_mxm_m2_m1_5points_att(i,3)*hprime_xxT(3,j) + &
-                      A2_mxm_m2_m1_5points_att(i,4)*hprime_xxT(4,j) + &
-                      A2_mxm_m2_m1_5points_att(i,5)*hprime_xxT(5,j)
-
-                 C3_mxm_m2_m1_5points_att(i,j) = C3_mxm_m2_m1_5points(i,j) + &
-                      A3_mxm_m2_m1_5points_att(i,1)*hprime_xxT(1,j) + &
-                      A3_mxm_m2_m1_5points_att(i,2)*hprime_xxT(2,j) + &
-                      A3_mxm_m2_m1_5points_att(i,3)*hprime_xxT(3,j) + &
-                      A3_mxm_m2_m1_5points_att(i,4)*hprime_xxT(4,j) + &
-                      A3_mxm_m2_m1_5points_att(i,5)*hprime_xxT(5,j)
-              enddo
-           enddo
-        endif
-
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-              ! get derivatives of ux, uy and uz with respect to x, y and z
-              xixl = xix(i,j,k,ispec)
-              xiyl = xiy(i,j,k,ispec)
-              xizl = xiz(i,j,k,ispec)
-              etaxl = etax(i,j,k,ispec)
-              etayl = etay(i,j,k,ispec)
-              etazl = etaz(i,j,k,ispec)
-              gammaxl = gammax(i,j,k,ispec)
-              gammayl = gammay(i,j,k,ispec)
-              gammazl = gammaz(i,j,k,ispec)
-              jacobianl = jacobian(i,j,k,ispec)
-
-              duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
-              duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
-              duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
-
-              duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
-              duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
-              duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
-
-              duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
-              duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
-              duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
-
-              ! save strain on the Moho boundary
-              if (SAVE_MOHO_MESH ) then
-                if (is_moho_top(ispec)) then
-                  dsdx_top(1,1,i,j,k,ispec2D_moho_top) = duxdxl
-                  dsdx_top(1,2,i,j,k,ispec2D_moho_top) = duxdyl
-                  dsdx_top(1,3,i,j,k,ispec2D_moho_top) = duxdzl
-                  dsdx_top(2,1,i,j,k,ispec2D_moho_top) = duydxl
-                  dsdx_top(2,2,i,j,k,ispec2D_moho_top) = duydyl
-                  dsdx_top(2,3,i,j,k,ispec2D_moho_top) = duydzl
-                  dsdx_top(3,1,i,j,k,ispec2D_moho_top) = duzdxl
-                  dsdx_top(3,2,i,j,k,ispec2D_moho_top) = duzdyl
-                  dsdx_top(3,3,i,j,k,ispec2D_moho_top) = duzdzl
-                else if (is_moho_bot(ispec)) then
-                  dsdx_bot(1,1,i,j,k,ispec2D_moho_bot) = duxdxl
-                  dsdx_bot(1,2,i,j,k,ispec2D_moho_bot) = duxdyl
-                  dsdx_bot(1,3,i,j,k,ispec2D_moho_bot) = duxdzl
-                  dsdx_bot(2,1,i,j,k,ispec2D_moho_bot) = duydxl
-                  dsdx_bot(2,2,i,j,k,ispec2D_moho_bot) = duydyl
-                  dsdx_bot(2,3,i,j,k,ispec2D_moho_bot) = duydzl
-                  dsdx_bot(3,1,i,j,k,ispec2D_moho_bot) = duzdxl
-                  dsdx_bot(3,2,i,j,k,ispec2D_moho_bot) = duzdyl
-                  dsdx_bot(3,3,i,j,k,ispec2D_moho_bot) = duzdzl
-                endif
-              endif
-
-              ! precompute some sums to save CPU time
-              duxdxl_plus_duydyl = duxdxl + duydyl
-              duxdxl_plus_duzdzl = duxdxl + duzdzl
-              duydyl_plus_duzdzl = duydyl + duzdzl
-              duxdyl_plus_duydxl = duxdyl + duydxl
-              duzdxl_plus_duxdzl = duzdxl + duxdzl
-              duzdyl_plus_duydzl = duzdyl + duydzl
-
-              if ( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
-                 ! temporary variables used for fixing attenuation in a consistent way
-                 duxdxl_att = xixl*tempx1_att(i,j,k) + etaxl*tempx2_att(i,j,k) + gammaxl*tempx3_att(i,j,k)
-                 duxdyl_att = xiyl*tempx1_att(i,j,k) + etayl*tempx2_att(i,j,k) + gammayl*tempx3_att(i,j,k)
-                 duxdzl_att = xizl*tempx1_att(i,j,k) + etazl*tempx2_att(i,j,k) + gammazl*tempx3_att(i,j,k)
-
-                 duydxl_att = xixl*tempy1_att(i,j,k) + etaxl*tempy2_att(i,j,k) + gammaxl*tempy3_att(i,j,k)
-                 duydyl_att = xiyl*tempy1_att(i,j,k) + etayl*tempy2_att(i,j,k) + gammayl*tempy3_att(i,j,k)
-                 duydzl_att = xizl*tempy1_att(i,j,k) + etazl*tempy2_att(i,j,k) + gammazl*tempy3_att(i,j,k)
-
-                 duzdxl_att = xixl*tempz1_att(i,j,k) + etaxl*tempz2_att(i,j,k) + gammaxl*tempz3_att(i,j,k)
-                 duzdyl_att = xiyl*tempz1_att(i,j,k) + etayl*tempz2_att(i,j,k) + gammayl*tempz3_att(i,j,k)
-                 duzdzl_att = xizl*tempz1_att(i,j,k) + etazl*tempz2_att(i,j,k) + gammazl*tempz3_att(i,j,k)
-
-                 ! precompute some sums to save CPU time
-                 duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
-                 duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
-                 duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
-
-                 ! compute deviatoric strain
-                 templ = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
-                 if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                 if(FULL_ATTENUATION_SOLID) epsilondev_trace_loc(i,j,k) = 3.0 * templ
-                 epsilondev_xx_loc(i,j,k) = duxdxl_att - templ
-                 epsilondev_yy_loc(i,j,k) = duydyl_att - templ
-                 epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl_att
-                 epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
-                 epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl_att
-              else
-                 ! computes deviatoric strain attenuation and/or for kernel calculations
-                 if (COMPUTE_AND_STORE_STRAIN) then
-                    templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
-                    if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                    if(FULL_ATTENUATION_SOLID) epsilondev_trace_loc(i,j,k) = 3.0 * templ
-                    epsilondev_xx_loc(i,j,k) = duxdxl - templ
-                    epsilondev_yy_loc(i,j,k) = duydyl - templ
-                    epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl
-                    epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl
-                    epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl
-                 endif
-              endif
-
-              kappal = kappastore(i,j,k,ispec)
-              mul = mustore(i,j,k,ispec)
-
-              ! attenuation
-              if(ATTENUATION) then
-                ! use unrelaxed parameters if attenuation
-                mul  = mul * one_minus_sum_beta(i,j,k,ispec)
-                if(FULL_ATTENUATION_SOLID) then  !ZN
-                   kappal  = kappal * one_minus_sum_beta_kappa(i,j,k,ispec)  !ZN
-                endif  !ZN
-              endif
-
-  ! full anisotropic case, stress calculations
-              if(ANISOTROPY) then
-                c11 = c11store(i,j,k,ispec)
-                c12 = c12store(i,j,k,ispec)
-                c13 = c13store(i,j,k,ispec)
-                c14 = c14store(i,j,k,ispec)
-                c15 = c15store(i,j,k,ispec)
-                c16 = c16store(i,j,k,ispec)
-                c22 = c22store(i,j,k,ispec)
-                c23 = c23store(i,j,k,ispec)
-                c24 = c24store(i,j,k,ispec)
-                c25 = c25store(i,j,k,ispec)
-                c26 = c26store(i,j,k,ispec)
-                c33 = c33store(i,j,k,ispec)
-                c34 = c34store(i,j,k,ispec)
-                c35 = c35store(i,j,k,ispec)
-                c36 = c36store(i,j,k,ispec)
-                c44 = c44store(i,j,k,ispec)
-                c45 = c45store(i,j,k,ispec)
-                c46 = c46store(i,j,k,ispec)
-                c55 = c55store(i,j,k,ispec)
-                c56 = c56store(i,j,k,ispec)
-                c66 = c66store(i,j,k,ispec)
-
-                sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
-                          c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
-                sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
-                          c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
-                sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
-                          c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
-                sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
-                          c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
-                sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
-                          c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
-                sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
-                          c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
-
-              else
-
-  ! isotropic case
-                lambdalplus2mul = kappal + FOUR_THIRDS * mul
-                lambdal = lambdalplus2mul - 2.*mul
-
-                ! compute stress sigma
-                sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
-                sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
-                sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
-
-                sigma_xy = mul*duxdyl_plus_duydxl
-                sigma_xz = mul*duzdxl_plus_duxdzl
-                sigma_yz = mul*duzdyl_plus_duydzl
-
-              endif ! ANISOTROPY
-
-              ! subtract memory variables if attenuation
-              if(ATTENUATION) then
-! way 1
-!                do i_sls = 1,N_SLS
-!                  R_xx_val = R_xx(i,j,k,ispec,i_sls)
-!                  R_yy_val = R_yy(i,j,k,ispec,i_sls)
-!                  sigma_xx = sigma_xx - R_xx_val
-!                  sigma_yy = sigma_yy - R_yy_val
-!                  sigma_zz = sigma_zz + R_xx_val + R_yy_val
-!                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-!                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-!                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-!                enddo
-
-! way 2
-! note: this should help compilers to pipeline the code and make better use of the cache;
-!          depending on compilers, it can further decrease the computation time by ~ 30%.
-!          by default, N_SLS = 3, therefore we take steps of 3
-              if(imodulo_N_SLS >= 1) then
-                do i_sls = 1,imodulo_N_SLS
-                  if(FULL_ATTENUATION_SOLID) then !! ZN: for performance, it would be better to avoid "if" statements inside loops
-                    R_trace_val1 = R_trace(i,j,k,ispec,i_sls)
-                  else
-                    R_trace_val1 = 0.
-                  endif
-                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                  sigma_xx = sigma_xx - R_xx_val1 - R_trace_val1
-                  sigma_yy = sigma_yy - R_yy_val1 - R_trace_val1
-                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1 - R_trace_val1
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-                enddo
-              endif
-
-              if(N_SLS >= imodulo_N_SLS+1) then
-                do i_sls = imodulo_N_SLS+1,N_SLS,3
-                  if(FULL_ATTENUATION_SOLID) then !! ZN: for performance, it would be better to avoid "if" statements inside loops
-                    R_trace_val1 = R_trace(i,j,k,ispec,i_sls)
-                  else
-                    R_trace_val1 = 0.
-                  endif
-                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                  sigma_xx = sigma_xx - R_xx_val1 - R_trace_val1
-                  sigma_yy = sigma_yy - R_yy_val1 - R_trace_val1
-                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1 - R_trace_val1
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-                  if(FULL_ATTENUATION_SOLID) then
-                    R_trace_val2 = R_trace(i,j,k,ispec,i_sls+1)
-                  else
-                    R_trace_val2 = 0.
-                  endif
-                  R_xx_val2 = R_xx(i,j,k,ispec,i_sls+1)
-                  R_yy_val2 = R_yy(i,j,k,ispec,i_sls+1)
-                  sigma_xx = sigma_xx - R_xx_val2 - R_trace_val2
-                  sigma_yy = sigma_yy - R_yy_val2 - R_trace_val2
-                  sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2 - R_trace_val2
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+1)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+1)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+1)
-
-                  if(FULL_ATTENUATION_SOLID) then
-                    R_trace_val3 = R_trace(i,j,k,ispec,i_sls+2)
-                  else
-                    R_trace_val3 = 0.
-                  endif
-                  R_xx_val3 = R_xx(i,j,k,ispec,i_sls+2)
-                  R_yy_val3 = R_yy(i,j,k,ispec,i_sls+2)
-                  sigma_xx = sigma_xx - R_xx_val3 - R_trace_val3
-                  sigma_yy = sigma_yy - R_yy_val3 - R_trace_val3
-                  sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3 - R_trace_val3
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+2)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+2)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+2)
-                enddo
-              endif
-
-
-              endif
-
-            ! define symmetric components of sigma
-            sigma_yx = sigma_xy
-            sigma_zx = sigma_xz
-            sigma_zy = sigma_yz
-
-            ! form dot product with test vector, non-symmetric form (which is useful in the case of PML)
-            tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
-            tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
-            tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
-
-            tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
-            tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
-            tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
-
-            tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
-            tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
-            tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
-
-            enddo
-          enddo
-        enddo
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-        ! call mxm_m1_m2_5points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
-        do j=1,m2
-          do i=1,m1
-            E1_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_5points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C1_m1_m2_5points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C1_m1_m2_5points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C1_m1_m2_5points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C1_m1_m2_5points(5,j)
-            E2_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_5points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C2_m1_m2_5points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C2_m1_m2_5points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C2_m1_m2_5points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C2_m1_m2_5points(5,j)
-            E3_m1_m2_5points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_5points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C3_m1_m2_5points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C3_m1_m2_5points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C3_m1_m2_5points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C3_m1_m2_5points(5,j)
-          enddo
-        enddo
-
-        !   call mxm_m1_m1_5points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
-        !         hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
-        do i=1,m1
-          do j=1,m1
-            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-            do k = 1,NGLLX
-              newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempx2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempx2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempx2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempx2(i,5,k)*hprimewgll_xx(5,j)
-              newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempy2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempy2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempy2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempy2(i,5,k)*hprimewgll_xx(5,j)
-              newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempz2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempz2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempz2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempz2(i,5,k)*hprimewgll_xx(5,j)
-            enddo
-          enddo
-        enddo
-
-        ! call mxm_m2_m1_5points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
-        do j=1,m1
-          do i=1,m2
-            E1_mxm_m2_m1_5points(i,j) = C1_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
-                                      C1_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
-                                      C1_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
-                                      C1_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
-                                      C1_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
-            E2_mxm_m2_m1_5points(i,j) = C2_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
-                                      C2_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
-                                      C2_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
-                                      C2_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
-                                      C2_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
-            E3_mxm_m2_m1_5points(i,j) = C3_mxm_m2_m1_5points(i,1)*hprimewgll_xx(1,j) + &
-                                      C3_mxm_m2_m1_5points(i,2)*hprimewgll_xx(2,j) + &
-                                      C3_mxm_m2_m1_5points(i,3)*hprimewgll_xx(3,j) + &
-                                      C3_mxm_m2_m1_5points(i,4)*hprimewgll_xx(4,j) + &
-                                      C3_mxm_m2_m1_5points(i,5)*hprimewgll_xx(5,j)
-          enddo
-        enddo
-
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-
-              fac1 = wgllwgll_yz(j,k)
-              fac2 = wgllwgll_xz(i,k)
-              fac3 = wgllwgll_xy(i,j)
-
-              ! sum contributions from each element to the global mesh using indirect addressing
-              iglob = ibool(i,j,k,ispec)
-              accel(1,iglob) = accel(1,iglob) - fac1*newtempx1(i,j,k) - &
-                                fac2*newtempx2(i,j,k) - fac3*newtempx3(i,j,k)
-              accel(2,iglob) = accel(2,iglob) - fac1*newtempy1(i,j,k) - &
-                                fac2*newtempy2(i,j,k) - fac3*newtempy3(i,j,k)
-              accel(3,iglob) = accel(3,iglob) - fac1*newtempz1(i,j,k) - &
-                                fac2*newtempz2(i,j,k) - fac3*newtempz3(i,j,k)
-
-              !  update memory variables based upon the Runge-Kutta scheme
-              if(ATTENUATION) then
-
-                 ! use Runge-Kutta scheme to march in time
-                 do i_sls = 1,N_SLS
-
-                    alphaval_loc = alphaval(i_sls)
-                    betaval_loc = betaval(i_sls)
-                    gammaval_loc = gammaval(i_sls)
-
-                    if(FULL_ATTENUATION_SOLID) then
-                      ! term in trace  !ZN
-                      factor_loc = kappastore(i,j,k,ispec) * factor_common_kappa(i_sls,i,j,k,ispec)  !ZN
-
-                      Sn   = factor_loc * epsilondev_trace(i,j,k,ispec)  !ZN
-                      Snp1   = factor_loc * epsilondev_trace_loc(i,j,k)  !ZN
-                      R_trace(i,j,k,ispec,i_sls) = alphaval_loc * R_trace(i,j,k,ispec,i_sls) + &  !ZN
-                                        betaval_loc * Sn + gammaval_loc * Snp1  !ZN
-                    endif
-
-                    ! term in xx yy zz xy xz yz
-                    factor_loc = mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
-
-                    ! term in xx
-                    Sn   = factor_loc * epsilondev_xx(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xx_loc(i,j,k)
-                    R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in yy
-                    Sn   = factor_loc * epsilondev_yy(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_yy_loc(i,j,k)
-                    R_yy(i,j,k,ispec,i_sls) = alphaval_loc * R_yy(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in zz not computed since zero trace
-                    ! term in xy
-                    Sn   = factor_loc * epsilondev_xy(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xy_loc(i,j,k)
-                    R_xy(i,j,k,ispec,i_sls) = alphaval_loc * R_xy(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in xz
-                    Sn   = factor_loc * epsilondev_xz(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xz_loc(i,j,k)
-                    R_xz(i,j,k,ispec,i_sls) = alphaval_loc * R_xz(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in yz
-                    Sn   = factor_loc * epsilondev_yz(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_yz_loc(i,j,k)
-                    R_yz(i,j,k,ispec,i_sls) = alphaval_loc * R_yz(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                 enddo   ! end of loop on memory variables
-
-              endif  !  end attenuation
-
-            enddo
-          enddo
-        enddo
-
-        ! save deviatoric strain for Runge-Kutta scheme
-        if ( COMPUTE_AND_STORE_STRAIN ) then
-          if(FULL_ATTENUATION_SOLID) epsilondev_trace(:,:,:,ispec) = epsilondev_trace_loc(:,:,:)
-          epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
-          epsilondev_yy(:,:,:,ispec) = epsilondev_yy_loc(:,:,:)
-          epsilondev_xy(:,:,:,ispec) = epsilondev_xy_loc(:,:,:)
-          epsilondev_xz(:,:,:,ispec) = epsilondev_xz_loc(:,:,:)
-          epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:)
-        endif
-
-  enddo  ! spectral element loop
-
-  end subroutine compute_forces_viscoelastic_Dev_5p
-

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90	2013-03-20 02:13:06 UTC (rev 21588)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_viscoelastic_calling_routine.F90	2013-03-20 02:27:39 UTC (rev 21589)
@@ -335,12 +335,6 @@
 
     endif
 
-    !! DK DK May 2009: removed this because now each slice of a CUBIT + SCOTCH mesh
-    !! DK DK May 2009: has a different number of spectral elements and therefore
-    !! DK DK May 2009: only the general non-blocking MPI routines assemble_MPI_vector_ext_mesh_s
-    !! DK DK May 2009: and assemble_MPI_vector_ext_mesh_w above can be used.
-    !! DK DK May 2009: For adjoint runs below (SIMULATION_TYPE == 3) they should be used as well.
-
  enddo
 
 !Percy , Fault boundary term B*tau is added to the assembled forces

Copied: seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.F90 (from rev 21588, seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.f90)
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.F90	                        (rev 0)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.F90	2013-03-20 02:27:39 UTC (rev 21589)
@@ -0,0 +1,823 @@
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 1
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and CNRS / INRIA / University of Pau
+! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
+!                             July 2012
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+!
+! United States and French Government Sponsorship Acknowledged.
+
+  subroutine iterate_time()
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+  use specfem_par_movie
+
+  implicit none
+
+!
+!   s t a r t   t i m e   i t e r a t i o n s
+!
+
+! synchronize all processes to make sure everybody is ready to start time loop
+  call sync_all()
+  if(myrank == 0) write(IMAIN,*) 'All processes are synchronized before time loop'
+
+  if(myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) 'Starting time iteration loop...'
+    write(IMAIN,*)
+  endif
+
+! create an empty file to monitor the start of the simulation
+  if(myrank == 0) then
+    open(unit=IOUT,file=trim(OUTPUT_FILES)//'/starttimeloop.txt',status='unknown')
+    write(IOUT,*) 'starting time loop'
+    close(IOUT)
+  endif
+
+! get MPI starting time
+  time_start = wtime()
+
+
+! *********************************************************
+! ************* MAIN LOOP OVER THE TIME STEPS *************
+! *********************************************************
+
+  do it = 1,NSTEP
+
+    ! simulation status output and stability check
+    if(mod(it,NTSTEP_BETWEEN_OUTPUT_INFO) == 0 .or. it == 5 .or. it == NSTEP) then
+      call it_check_stability()
+    endif
+
+    ! update displacement using Newmark time scheme
+    call it_update_displacement_scheme()
+
+    ! acoustic solver
+    ! (needs to be done first, before elastic one)
+    if( ACOUSTIC_SIMULATION ) call compute_forces_acoustic()
+    ! elastic solver
+    ! (needs to be done first, before poroelastic one)
+
+    if( ELASTIC_SIMULATION ) call compute_forces_viscoelastic()
+
+    ! poroelastic solver
+    if( POROELASTIC_SIMULATION ) call compute_forces_poroelastic()
+
+    ! restores last time snapshot saved for backward/reconstruction of wavefields
+    ! note: this must be read in after the Newmark time scheme
+    if( SIMULATION_TYPE == 3 .and. it == 1 ) then
+     call it_read_forward_arrays()
+    endif
+
+    ! write the seismograms with time shift (GPU_MODE transfer included)
+    if (nrec_local > 0 .or. ( WRITE_SEISMOGRAMS_BY_MASTER .and. myrank == 0 ) ) then
+      call write_seismograms()
+    endif
+
+    ! resetting d/v/a/R/eps for the backward reconstruction with attenuation
+    if (ATTENUATION ) then
+      call it_store_attenuation_arrays()
+    endif
+
+    ! adjoint simulations: kernels
+    if( SIMULATION_TYPE == 3 ) then
+      call compute_kernels()
+    endif
+
+    ! outputs movie files
+    if( MOVIE_SIMULATION ) then
+      call write_movie_output()
+    endif
+
+    ! first step of noise tomography, i.e., save a surface movie at every time step
+    if ( NOISE_TOMOGRAPHY == 1) then
+      if( num_free_surface_faces > 0) then
+        call noise_save_surface_movie(displ,ibool, &
+                            noise_surface_movie,it, &
+                            NSPEC_AB,NGLOB_AB, &
+                            num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
+                            Mesh_pointer,GPU_MODE)
+      endif
+    endif
+
+!
+!---- end of time iteration loop
+!
+  enddo   ! end of main time loop
+
+  call it_print_elapsed_time()
+
+  ! Transfer fields from GPU card to host for further analysis
+  if(GPU_MODE) call it_transfer_from_GPU()
+
+  end subroutine iterate_time
+
+
+!=====================================================================
+
+  subroutine it_check_stability()
+
+! computes the maximum of the norm of the displacement
+! in all the slices using an MPI reduction
+! and output timestamp file to check that simulation is running fine
+
+  use specfem_par
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+  use specfem_par_acoustic
+  implicit none
+
+  double precision :: tCPU,t_remain,t_total
+  integer :: ihours,iminutes,iseconds,int_tCPU, &
+             ihours_remain,iminutes_remain,iseconds_remain,int_t_remain, &
+             ihours_total,iminutes_total,iseconds_total,int_t_total
+
+  ! maximum of the norm of the displacement
+  real(kind=CUSTOM_REAL) Usolidnorm,Usolidnorm_all ! elastic
+  real(kind=CUSTOM_REAL) Usolidnormp,Usolidnormp_all ! acoustic
+  real(kind=CUSTOM_REAL) Usolidnorms,Usolidnorms_all ! solid poroelastic
+  real(kind=CUSTOM_REAL) Usolidnormw,Usolidnormw_all ! fluid (w.r.t.s) poroelastic
+
+  ! norm of the backward displacement
+  real(kind=CUSTOM_REAL) b_Usolidnorm, b_Usolidnorm_all
+
+  ! initializes
+  Usolidnorm_all = 0.0_CUSTOM_REAL
+  Usolidnormp_all = 0.0_CUSTOM_REAL
+  Usolidnorms_all = 0.0_CUSTOM_REAL
+  Usolidnormw_all = 0.0_CUSTOM_REAL
+
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+!chris: Rewrite to get norm for each material when coupled simulations
+!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
+
+! compute maximum of norm of displacement in each slice
+  if( ELASTIC_SIMULATION ) then
+    if( GPU_MODE) then
+      ! way 2: just get maximum of field from GPU
+      call get_norm_elastic_from_device(Usolidnorm,Mesh_pointer,1)
+    else
+      Usolidnorm = maxval(sqrt(displ(1,:)**2 + displ(2,:)**2 + displ(3,:)**2))
+    endif
+
+    ! check stability of the code, exit if unstable
+    ! negative values can occur with some compilers when the unstable value is greater
+    ! than the greatest possible floating-point number of the machine
+    !if(Usolidnorm > STABILITY_THRESHOLD .or. Usolidnorm < 0.0_CUSTOM_REAL) &
+    !  call exit_MPI(myrank,'single forward simulation became unstable and blew up')
+
+    ! compute the maximum of the maxima for all the slices using an MPI reduction
+    call max_all_cr(Usolidnorm,Usolidnorm_all)
+  endif
+
+  if( ACOUSTIC_SIMULATION ) then
+    if(GPU_MODE) then
+      ! way 2: just get maximum of field from GPU
+      call get_norm_acoustic_from_device(Usolidnormp,Mesh_pointer,1)
+    else
+      Usolidnormp = maxval(abs(potential_dot_dot_acoustic(:)))
+    endif
+
+    ! compute the maximum of the maxima for all the slices using an MPI reduction
+    call max_all_cr(Usolidnormp,Usolidnormp_all)
+  endif
+
+  if( POROELASTIC_SIMULATION ) then
+    Usolidnorms = maxval(sqrt(displs_poroelastic(1,:)**2 + displs_poroelastic(2,:)**2 + &
+                             displs_poroelastic(3,:)**2))
+    Usolidnormw = maxval(sqrt(displw_poroelastic(1,:)**2 + displw_poroelastic(2,:)**2 + &
+                             displw_poroelastic(3,:)**2))
+
+    ! compute the maximum of the maxima for all the slices using an MPI reduction
+    call max_all_cr(Usolidnorms,Usolidnorms_all)
+    call max_all_cr(Usolidnormw,Usolidnormw_all)
+  endif
+
+
+  ! adjoint simulations
+  if( SIMULATION_TYPE == 3 ) then
+    if( ELASTIC_SIMULATION ) then
+      ! way 2
+      if(GPU_MODE) then
+        call get_norm_elastic_from_device(b_Usolidnorm,Mesh_pointer,3)
+      else
+        b_Usolidnorm = maxval(sqrt(b_displ(1,:)**2 + b_displ(2,:)**2 + b_displ(3,:)**2))
+      endif
+    endif
+    if( ACOUSTIC_SIMULATION ) then
+        ! way 2
+        if(GPU_MODE) then
+          call get_norm_acoustic_from_device(b_Usolidnorm,Mesh_pointer,3)
+        else
+          b_Usolidnorm = maxval(abs(b_potential_dot_dot_acoustic(:)))
+        endif
+    endif
+    if( POROELASTIC_SIMULATION ) then
+        b_Usolidnorm = maxval(sqrt(b_displs_poroelastic(1,:)**2 + b_displs_poroelastic(2,:)**2 + &
+                                 b_displs_poroelastic(3,:)**2))
+    endif
+    ! check stability of the code, exit if unstable
+    ! negative values can occur with some compilers when the unstable value is greater
+    ! than the greatest possible floating-point number of the machine
+    !if(b_Usolidnorm > STABILITY_THRESHOLD .or. b_Usolidnorm < 0.0_CUSTOM_REAL) &
+    !  call exit_MPI(myrank,'single backward simulation became unstable and blew up')
+
+    ! compute max of all slices
+    call max_all_cr(b_Usolidnorm,b_Usolidnorm_all)
+  endif
+
+  ! user output
+  if(myrank == 0) then
+
+    write(IMAIN,*) 'Time step # ',it
+    write(IMAIN,*) 'Time: ',sngl((it-1)*DT-t0),' seconds'
+
+    ! elapsed time since beginning of the simulation
+    tCPU = wtime() - time_start
+    int_tCPU = int(tCPU)
+    ihours = int_tCPU / 3600
+    iminutes = (int_tCPU - 3600*ihours) / 60
+    iseconds = int_tCPU - 3600*ihours - 60*iminutes
+    write(IMAIN,*) 'Elapsed time in seconds = ',tCPU
+    write(IMAIN,"(' Elapsed time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") ihours,iminutes,iseconds
+    write(IMAIN,*) 'Mean elapsed time per time step in seconds = ',sngl(tCPU/dble(it))
+    if( ELASTIC_SIMULATION ) then
+      write(IMAIN,*) 'Max norm displacement vector U in all slices (m) = ',Usolidnorm_all
+    endif
+    if( ACOUSTIC_SIMULATION ) then
+        write(IMAIN,*) 'Max norm pressure P in all slices (Pa) = ',Usolidnormp_all
+    endif
+    if( POROELASTIC_SIMULATION ) then
+        write(IMAIN,*) 'Max norm displacement vector Us in all slices (m) = ',Usolidnorms_all
+        write(IMAIN,*) 'Max norm displacement vector W in all slices (m) = ',Usolidnormw_all
+    endif
+    ! adjoint simulations
+    if (SIMULATION_TYPE == 3) write(IMAIN,*) &
+           'Max norm U (backward) in all slices = ',b_Usolidnorm_all
+
+    ! compute estimated remaining simulation time
+    t_remain = (NSTEP - it) * (tCPU/dble(it))
+    int_t_remain = int(t_remain)
+    ihours_remain = int_t_remain / 3600
+    iminutes_remain = (int_t_remain - 3600*ihours_remain) / 60
+    iseconds_remain = int_t_remain - 3600*ihours_remain - 60*iminutes_remain
+    write(IMAIN,*) 'Time steps done = ',it,' out of ',NSTEP
+    write(IMAIN,*) 'Time steps remaining = ',NSTEP - it
+    write(IMAIN,*) 'Estimated remaining time in seconds = ',sngl(t_remain)
+    write(IMAIN,"(' Estimated remaining time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") &
+             ihours_remain,iminutes_remain,iseconds_remain
+
+    ! compute estimated total simulation time
+    t_total = t_remain + tCPU
+    int_t_total = int(t_total)
+    ihours_total = int_t_total / 3600
+    iminutes_total = (int_t_total - 3600*ihours_total) / 60
+    iseconds_total = int_t_total - 3600*ihours_total - 60*iminutes_total
+    write(IMAIN,*) 'Estimated total run time in seconds = ',sngl(t_total)
+    write(IMAIN,"(' Estimated total run time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") &
+             ihours_total,iminutes_total,iseconds_total
+    write(IMAIN,*) 'We have done ',sngl(100.d0*dble(it)/dble(NSTEP)),'% of that'
+
+    if(it < 100) then
+      write(IMAIN,*) '************************************************************'
+      write(IMAIN,*) '**** BEWARE: the above time estimates are not reliable'
+      write(IMAIN,*) '**** because fewer than 100 iterations have been performed'
+      write(IMAIN,*) '************************************************************'
+    endif
+    write(IMAIN,*)
+
+    ! flushes file buffer for main output file (IMAIN)
+    call flush_IMAIN()
+
+    ! write time stamp file to give information about progression of simulation
+    write(outputname,"('/timestamp',i6.6)") it
+    open(unit=IOUT,file=trim(OUTPUT_FILES)//outputname,status='unknown')
+    write(IOUT,*) 'Time step # ',it
+    write(IOUT,*) 'Time: ',sngl((it-1)*DT-t0),' seconds'
+    write(IOUT,*) 'Elapsed time in seconds = ',tCPU
+    write(IOUT,"(' Elapsed time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") ihours,iminutes,iseconds
+    write(IOUT,*) 'Mean elapsed time per time step in seconds = ',tCPU/dble(it)
+    if( ELASTIC_SIMULATION ) then
+      write(IOUT,*) 'Max norm displacement vector U in all slices (m) = ',Usolidnorm_all
+    endif
+    if( ACOUSTIC_SIMULATION ) then
+        write(IOUT,*) 'Max norm pressure P in all slices (Pa) = ',Usolidnormp_all
+    endif
+    if( POROELASTIC_SIMULATION ) then
+        write(IOUT,*) 'Max norm displacement vector Us in all slices (m) = ',Usolidnorms_all
+        write(IOUT,*) 'Max norm displacement vector W in all slices (m) = ',Usolidnormw_all
+    endif
+    ! adjoint simulations
+    if (SIMULATION_TYPE == 3) write(IOUT,*) &
+           'Max norm U (backward) in all slices = ',b_Usolidnorm_all
+    ! estimation
+    write(IOUT,*) 'Time steps done = ',it,' out of ',NSTEP
+    write(IOUT,*) 'Time steps remaining = ',NSTEP - it
+    write(IOUT,*) 'Estimated remaining time in seconds = ',t_remain
+    write(IOUT,"(' Estimated remaining time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") &
+             ihours_remain,iminutes_remain,iseconds_remain
+    write(IOUT,*) 'Estimated total run time in seconds = ',t_total
+    write(IOUT,"(' Estimated total run time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") &
+             ihours_total,iminutes_total,iseconds_total
+    write(IOUT,*) 'We have done ',sngl(100.d0*dble(it)/dble(NSTEP)),'% of that'
+    close(IOUT)
+
+    ! check stability of the code, exit if unstable
+    ! negative values can occur with some compilers when the unstable value is greater
+    ! than the greatest possible floating-point number of the machine
+    if(Usolidnorm_all > STABILITY_THRESHOLD .or. Usolidnorm_all < 0.0_CUSTOM_REAL &
+     .or. Usolidnormp_all > STABILITY_THRESHOLD .or. Usolidnormp_all < 0.0_CUSTOM_REAL &
+     .or. Usolidnorms_all > STABILITY_THRESHOLD .or. Usolidnorms_all < 0.0_CUSTOM_REAL &
+     .or. Usolidnormw_all > STABILITY_THRESHOLD .or. Usolidnormw_all < 0.0_CUSTOM_REAL) &
+        call exit_MPI(myrank,'forward simulation became unstable and blew up')
+    ! adjoint simulations
+    if(SIMULATION_TYPE == 3 .and. (b_Usolidnorm_all > STABILITY_THRESHOLD &
+      .or. b_Usolidnorm_all < 0.0)) &
+        call exit_MPI(myrank,'backward simulation became unstable and blew up')
+
+  endif ! myrank
+
+  end subroutine it_check_stability
+
+
+!=====================================================================
+
+  subroutine it_update_displacement_scheme()
+
+! explicit Newmark time scheme with acoustic & elastic domains:
+! (see e.g. Hughes, 1987; Chaljub et al., 2003)
+!
+! chi(t+delta_t) = chi(t) + delta_t chi_dot(t) + 1/2 delta_t**2 chi_dot_dot(t)
+! chi_dot(t+delta_t) = chi_dot(t) + 1/2 delta_t chi_dot_dot(t) + 1/2 delta_t chi_dot_dot(t+delta_t)
+! chi_dot_dot(t+delta_t) = 1/M_acoustic( -K_acoustic chi(t+delta) + B_acoustic u(t+delta_t) + f(t+delta_t) )
+!
+! u(t+delta_t) = u(t) + delta_t  v(t) + 1/2  delta_t**2 a(t)
+! v(t+delta_t) = v(t) + 1/2 delta_t a(t) + 1/2 delta_t a(t+delta_t)
+! a(t+delta_t) = 1/M_elastic ( -K_elastic u(t+delta) + B_elastic chi_dot_dot(t+delta_t) + f( t+delta_t) )
+!
+! where
+!   chi, chi_dot, chi_dot_dot are acoustic (fluid) potentials ( dotted with respect to time)
+!   u, v, a are displacement,velocity & acceleration
+!   M is mass matrix, K stiffness matrix and B boundary term for acoustic/elastic domains
+!   f denotes a source term (acoustic/elastic)
+!
+! note that this stage calculates the predictor terms
+!
+!   for
+!   potential chi_dot(t+delta) requires + 1/2 delta_t chi_dot_dot(t+delta_t)
+!                                   at a later stage (corrector) once where chi_dot_dot(t+delta) is calculated
+!   and similar,
+!   velocity v(t+delta_t) requires  + 1/2 delta_t a(t+delta_t)
+!                                   at a later stage once where a(t+delta) is calculated
+! also:
+!   boundary term B_elastic requires chi_dot_dot(t+delta)
+!                                   thus chi_dot_dot has to be updated first before the elastic boundary term is considered
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+
+  implicit none
+
+! updates acoustic potentials
+  if( ACOUSTIC_SIMULATION ) then
+
+    if(.NOT. GPU_MODE) then
+      ! on CPU
+      potential_acoustic(:) = potential_acoustic(:) &
+                            + deltat * potential_dot_acoustic(:) &
+                            + deltatsqover2 * potential_dot_dot_acoustic(:)
+      potential_dot_acoustic(:) = potential_dot_acoustic(:) &
+                                + deltatover2 * potential_dot_dot_acoustic(:)
+      potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
+    else
+      ! on GPU
+      call it_update_displacement_ac_cuda(Mesh_pointer, NGLOB_AB, &
+                                          deltat, deltatsqover2, deltatover2, &
+                                          b_deltat, b_deltatsqover2, b_deltatover2)
+    endif
+
+  endif ! ACOUSTIC_SIMULATION
+
+! updates elastic displacement and velocity
+  if( ELASTIC_SIMULATION ) then
+
+    if(.NOT. GPU_MODE) then
+      ! on CPU
+      displ(:,:) = displ(:,:) + deltat*veloc(:,:) + deltatsqover2*accel(:,:)
+      veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
+      if( SIMULATION_TYPE /= 1 ) accel_adj_coupling(:,:) = accel(:,:)
+      accel(:,:) = 0._CUSTOM_REAL
+    else
+      ! on GPU
+      ! Includes SIM_TYPE 1 & 3 (for noise tomography)
+      call it_update_displacement_cuda(Mesh_pointer, size(displ), deltat, deltatsqover2,&
+                                       deltatover2, b_deltat, b_deltatsqover2, b_deltatover2)
+    endif
+  endif
+
+! updates poroelastic displacements and velocities
+  if( POROELASTIC_SIMULATION ) then
+    ! solid phase
+    displs_poroelastic(:,:) = displs_poroelastic(:,:) + deltat*velocs_poroelastic(:,:) + &
+                              deltatsqover2*accels_poroelastic(:,:)
+    velocs_poroelastic(:,:) = velocs_poroelastic(:,:) + deltatover2*accels_poroelastic(:,:)
+    accels_poroelastic(:,:) = 0._CUSTOM_REAL
+
+    ! fluid phase
+    displw_poroelastic(:,:) = displw_poroelastic(:,:) + deltat*velocw_poroelastic(:,:) + &
+                              deltatsqover2*accelw_poroelastic(:,:)
+    velocw_poroelastic(:,:) = velocw_poroelastic(:,:) + deltatover2*accelw_poroelastic(:,:)
+    accelw_poroelastic(:,:) = 0._CUSTOM_REAL
+  endif
+
+! adjoint simulations
+  if (SIMULATION_TYPE == 3 .and. .NOT. GPU_MODE) then
+    ! acoustic backward fields
+    if( ACOUSTIC_SIMULATION ) then
+      b_potential_acoustic(:) = b_potential_acoustic(:) &
+                              + b_deltat * b_potential_dot_acoustic(:) &
+                              + b_deltatsqover2 * b_potential_dot_dot_acoustic(:)
+      b_potential_dot_acoustic(:) = b_potential_dot_acoustic(:) &
+                                  + b_deltatover2 * b_potential_dot_dot_acoustic(:)
+      b_potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
+    endif
+
+    ! elastic backward fields
+    if( ELASTIC_SIMULATION ) then
+      b_displ(:,:) = b_displ(:,:) + b_deltat*b_veloc(:,:) + b_deltatsqover2*b_accel(:,:)
+      b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:)
+      b_accel(:,:) = 0._CUSTOM_REAL
+    endif
+    ! poroelastic backward fields
+    if( POROELASTIC_SIMULATION ) then
+    ! solid phase
+    b_displs_poroelastic(:,:) = b_displs_poroelastic(:,:) + b_deltat*b_velocs_poroelastic(:,:) + &
+                              b_deltatsqover2*b_accels_poroelastic(:,:)
+    b_velocs_poroelastic(:,:) = b_velocs_poroelastic(:,:) + b_deltatover2*b_accels_poroelastic(:,:)
+    b_accels_poroelastic(:,:) = 0._CUSTOM_REAL
+
+    ! fluid phase
+    b_displw_poroelastic(:,:) = b_displw_poroelastic(:,:) + b_deltat*b_velocw_poroelastic(:,:) + &
+                              b_deltatsqover2*b_accelw_poroelastic(:,:)
+    b_velocw_poroelastic(:,:) = b_velocw_poroelastic(:,:) + b_deltatover2*b_accelw_poroelastic(:,:)
+    b_accelw_poroelastic(:,:) = 0._CUSTOM_REAL
+    endif
+  endif
+
+! adjoint simulations: moho kernel
+  if (SAVE_MOHO_MESH .and. SIMULATION_TYPE == 3) then
+    ispec2D_moho_top = 0
+    ispec2D_moho_bot = 0
+  endif
+
+
+  end subroutine it_update_displacement_scheme
+
+!=====================================================================
+
+  subroutine it_read_forward_arrays()
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+  implicit none
+
+  integer :: ier
+
+! restores last time snapshot saved for backward/reconstruction of wavefields
+! note: this is done here after the Newmark time scheme, otherwise the indexing for sources
+!          and adjoint sources will become more complicated
+!          that is, index it for adjoint sources will match index NSTEP - 1 for backward/reconstructed wavefields
+
+  ! reads in wavefields
+  open(unit=27,file=trim(prname)//'save_forward_arrays.bin',status='old',&
+        action='read',form='unformatted',iostat=ier)
+  if( ier /= 0 ) then
+    print*,'error: opening save_forward_arrays'
+    print*,'path: ',trim(prname)//'save_forward_arrays.bin'
+    call exit_mpi(myrank,'error open file save_forward_arrays.bin')
+  endif
+
+  if( ACOUSTIC_SIMULATION ) then
+    read(27) b_potential_acoustic
+    read(27) b_potential_dot_acoustic
+    read(27) b_potential_dot_dot_acoustic
+
+    ! transfers fields onto GPU
+    if(GPU_MODE) &
+      call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
+                          b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
+  endif
+
+  ! elastic wavefields
+  if( ELASTIC_SIMULATION ) then
+    read(27) b_displ
+    read(27) b_veloc
+    read(27) b_accel
+
+    ! puts elastic wavefield to GPU
+    if(GPU_MODE) &
+      call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel,Mesh_pointer)
+
+    ! memory variables if attenuation
+    if( ATTENUATION ) then
+      if(FULL_ATTENUATION_SOLID) read(27) b_R_trace  !ZN
+      read(27) b_R_xx
+      read(27) b_R_yy
+      read(27) b_R_xy
+      read(27) b_R_xz
+      read(27) b_R_yz
+      if(FULL_ATTENUATION_SOLID) read(27) b_epsilondev_trace !ZN
+      read(27) b_epsilondev_xx
+      read(27) b_epsilondev_yy
+      read(27) b_epsilondev_xy
+      read(27) b_epsilondev_xz
+      read(27) b_epsilondev_yz
+
+      ! puts elastic attenuation arrays to GPU
+      if(GPU_MODE) &
+          call transfer_b_fields_att_to_device(Mesh_pointer, &
+                    b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz,size(b_R_xx), &
+!ZN                 b_R_trace,b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz,size(b_R_xx), &  ! please change the above line with this
+                    b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
+!ZN                 b_epsilondev_trace,b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
+!ZN                 ! please change the above line with this
+                    size(b_epsilondev_xx))
+    endif
+
+  endif
+
+  ! poroelastic wavefields
+  if( POROELASTIC_SIMULATION ) then
+    read(27) b_displs_poroelastic
+    read(27) b_velocs_poroelastic
+    read(27) b_accels_poroelastic
+    read(27) b_displw_poroelastic
+    read(27) b_velocw_poroelastic
+    read(27) b_accelw_poroelastic
+  endif
+
+  close(27)
+
+  end subroutine it_read_forward_arrays
+
+!=====================================================================
+
+  subroutine it_store_attenuation_arrays()
+
+! resetting d/v/a/R/eps for the backward reconstruction with attenuation
+
+  use specfem_par
+  use specfem_par_elastic
+  use specfem_par_acoustic
+
+  implicit none
+
+  if( it > 1 .and. it < NSTEP) then
+    ! adjoint simulations
+
+! note backward/reconstructed wavefields:
+!       storing wavefield displ() at time step it, corresponds to time (it-1)*DT - t0 (see routine write_seismograms_to_file )
+!       reconstucted wavefield b_displ() at it corresponds to time (NSTEP-it-1)*DT - t0
+!       we read in the reconstructed wavefield at the end of the time iteration loop, i.e. after the Newmark scheme,
+!       thus, indexing is NSTEP-it (rather than something like NSTEP-(it-1) )
+    if (SIMULATION_TYPE == 3 .and. mod(NSTEP-it,NSTEP_Q_SAVE) == 0) then
+      ! reads files content
+      write(outputname,"('save_Q_arrays_',i6.6,'.bin')") NSTEP-it
+      open(unit=27,file=trim(prname_Q)//trim(outputname),status='old',&
+            action='read',form='unformatted')
+      if( ELASTIC_SIMULATION ) then
+        ! reads arrays from disk files
+        read(27) b_displ
+        read(27) b_veloc
+        read(27) b_accel
+
+        ! puts elastic fields onto GPU
+        if(GPU_MODE) then
+          ! wavefields
+          call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
+        endif
+        if(FULL_ATTENUATION_SOLID) read(27) b_R_trace !ZN
+        read(27) b_R_xx
+        read(27) b_R_yy
+        read(27) b_R_xy
+        read(27) b_R_xz
+        read(27) b_R_yz
+        if(FULL_ATTENUATION_SOLID) read(27) b_epsilondev_trace !ZN
+        read(27) b_epsilondev_xx
+        read(27) b_epsilondev_yy
+        read(27) b_epsilondev_xy
+        read(27) b_epsilondev_xz
+        read(27) b_epsilondev_yz
+
+        ! puts elastic fields onto GPU
+        if(GPU_MODE) then
+          ! attenuation arrays
+          call transfer_b_fields_att_to_device(Mesh_pointer, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz,size(b_R_xx), &
+!ZN               b_R_trace,b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz,size(b_R_xx), &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
+!ZN               b_epsilondev_trace,b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
+                  size(b_epsilondev_xx))
+        endif
+      endif
+
+      if( ACOUSTIC_SIMULATION ) then
+        ! reads arrays from disk files
+        read(27) b_potential_acoustic
+        read(27) b_potential_dot_acoustic
+        read(27) b_potential_dot_dot_acoustic
+
+        ! puts acoustic fields onto GPU
+        if(GPU_MODE) &
+          call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
+                              b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
+
+      endif
+      close(27)
+    else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. mod(it,NSTEP_Q_SAVE) == 0) then
+      ! stores files content
+      write(outputname,"('save_Q_arrays_',i6.6,'.bin')") it
+      open(unit=27,file=trim(prname_Q)//trim(outputname),status='unknown',&
+           action='write',form='unformatted')
+      if( ELASTIC_SIMULATION ) then
+        ! gets elastic fields from GPU onto CPU
+        if(GPU_MODE) then
+          call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+        endif
+
+        ! writes to disk file
+        write(27) displ
+        write(27) veloc
+        write(27) accel
+
+        if(GPU_MODE) then
+          ! attenuation arrays
+          call transfer_fields_att_from_device(Mesh_pointer, &
+                     R_xx,R_yy,R_xy,R_xz,R_yz,size(R_xx), &
+!ZN                  R_trace,R_xx,R_yy,R_xy,R_xz,R_yz,size(R_xx), &
+                     epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
+!ZN                  epsilondev_trace,epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
+                     size(epsilondev_xx))
+        endif
+
+        if(FULL_ATTENUATION_SOLID) write(27) R_trace  !ZN
+        write(27) R_xx
+        write(27) R_yy
+        write(27) R_xy
+        write(27) R_xz
+        write(27) R_yz
+        if(FULL_ATTENUATION_SOLID) write(27) epsilondev_trace !ZN
+        write(27) epsilondev_xx
+        write(27) epsilondev_yy
+        write(27) epsilondev_xy
+        write(27) epsilondev_xz
+        write(27) epsilondev_yz
+      endif
+      if( ACOUSTIC_SIMULATION ) then
+       ! gets acoustic fields from GPU onto CPU
+        if(GPU_MODE) &
+          call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
+        ! writes to disk file
+        write(27) potential_acoustic
+        write(27) potential_dot_acoustic
+        write(27) potential_dot_dot_acoustic
+      endif
+      close(27)
+    endif ! SIMULATION_TYPE
+  endif ! it
+
+  end subroutine it_store_attenuation_arrays
+
+!=====================================================================
+
+  subroutine it_print_elapsed_time()
+
+    use specfem_par
+    use specfem_par_elastic
+    use specfem_par_acoustic
+    implicit none
+
+    ! local parameters
+    double precision :: tCPU
+    integer :: ihours,iminutes,iseconds,int_tCPU
+
+    if(myrank == 0) then
+       ! elapsed time since beginning of the simulation
+       tCPU = wtime() - time_start
+       int_tCPU = int(tCPU)
+       ihours = int_tCPU / 3600
+       iminutes = (int_tCPU - 3600*ihours) / 60
+       iseconds = int_tCPU - 3600*ihours - 60*iminutes
+       write(IMAIN,*) 'Time-Loop Complete. Timing info:'
+       write(IMAIN,*) 'Total elapsed time in seconds = ',tCPU
+       write(IMAIN,"(' Total elapsed time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") ihours,iminutes,iseconds
+    endif
+
+  end subroutine it_print_elapsed_time
+
+!=====================================================================
+
+  subroutine it_transfer_from_GPU()
+
+! transfers fields on GPU back onto CPU
+
+  use specfem_par
+  use specfem_par_elastic
+  use specfem_par_acoustic
+
+  implicit none
+
+  ! to store forward wave fields
+  if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+
+    ! acoustic potentials
+    if( ACOUSTIC_SIMULATION ) &
+      call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
+                            potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
+
+    ! elastic wavefield
+    if( ELASTIC_SIMULATION ) then
+      call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
+
+      if (ATTENUATION) &
+        call transfer_fields_att_from_device(Mesh_pointer, &
+                    R_xx,R_yy,R_xy,R_xz,R_yz,size(R_xx), &
+!ZN                 R_trace,R_xx,R_yy,R_xy,R_xz,R_yz,size(R_xx), &
+                    epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
+!ZN                 epsilondev_trace,epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
+                    size(epsilondev_xx))
+
+    endif
+  else if (SIMULATION_TYPE == 3) then
+
+    ! to store kernels
+    ! acoustic domains
+    if( ACOUSTIC_SIMULATION ) then
+      ! only in case needed...
+      !call transfer_b_fields_ac_from_device(NGLOB_AB,b_potential_acoustic, &
+      !                      b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
+
+      ! acoustic kernels
+      call transfer_kernels_ac_to_host(Mesh_pointer,rho_ac_kl,kappa_ac_kl,NSPEC_AB)
+    endif
+
+    ! elastic domains
+    if( ELASTIC_SIMULATION ) then
+      ! only in case needed...
+      !call transfer_b_fields_from_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
+
+      ! elastic kernels
+      call transfer_kernels_el_to_host(Mesh_pointer,rho_kl,mu_kl,kappa_kl,NSPEC_AB)
+    endif
+
+    ! specific noise strength kernel
+    if( NOISE_TOMOGRAPHY == 3 ) then
+      call transfer_kernels_noise_to_host(Mesh_pointer,Sigma_kl,NSPEC_AB)
+    endif
+
+    ! approximative hessian for preconditioning kernels
+    if ( APPROXIMATE_HESS_KL ) then
+      if( ELASTIC_SIMULATION ) call transfer_kernels_hess_el_tohost(Mesh_pointer,hess_kl,NSPEC_AB)
+      if( ACOUSTIC_SIMULATION ) call transfer_kernels_hess_ac_tohost(Mesh_pointer,hess_ac_kl,NSPEC_AB)
+    endif
+
+  endif
+
+  ! frees allocated memory on GPU
+  call prepare_cleanup_device(Mesh_pointer, &
+                              SAVE_FORWARD, &
+                              ACOUSTIC_SIMULATION,ELASTIC_SIMULATION, &
+                              ABSORBING_CONDITIONS,NOISE_TOMOGRAPHY,COMPUTE_AND_STORE_STRAIN, &
+                              ATTENUATION,ANISOTROPY,APPROXIMATE_OCEAN_LOAD, &
+                              APPROXIMATE_HESS_KL)
+
+  end subroutine it_transfer_from_GPU

Deleted: seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.f90	2013-03-20 02:13:06 UTC (rev 21588)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/iterate_time.f90	2013-03-20 02:27:39 UTC (rev 21589)
@@ -1,823 +0,0 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 1
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and CNRS / INRIA / University of Pau
-! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
-!                             July 2012
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-!
-! United States and French Government Sponsorship Acknowledged.
-
-  subroutine iterate_time()
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-  use specfem_par_movie
-
-  implicit none
-
-!
-!   s t a r t   t i m e   i t e r a t i o n s
-!
-
-! synchronize all processes to make sure everybody is ready to start time loop
-  call sync_all()
-  if(myrank == 0) write(IMAIN,*) 'All processes are synchronized before time loop'
-
-  if(myrank == 0) then
-    write(IMAIN,*)
-    write(IMAIN,*) 'Starting time iteration loop...'
-    write(IMAIN,*)
-  endif
-
-! create an empty file to monitor the start of the simulation
-  if(myrank == 0) then
-    open(unit=IOUT,file=trim(OUTPUT_FILES)//'/starttimeloop.txt',status='unknown')
-    write(IOUT,*) 'starting time loop'
-    close(IOUT)
-  endif
-
-! get MPI starting time
-  time_start = wtime()
-
-
-! *********************************************************
-! ************* MAIN LOOP OVER THE TIME STEPS *************
-! *********************************************************
-
-  do it = 1,NSTEP
-
-    ! simulation status output and stability check
-    if(mod(it,NTSTEP_BETWEEN_OUTPUT_INFO) == 0 .or. it == 5 .or. it == NSTEP) then
-      call it_check_stability()
-    endif
-
-    ! update displacement using Newmark time scheme
-    call it_update_displacement_scheme()
-
-    ! acoustic solver
-    ! (needs to be done first, before elastic one)
-    if( ACOUSTIC_SIMULATION ) call compute_forces_acoustic()
-    ! elastic solver
-    ! (needs to be done first, before poroelastic one)
-
-    if( ELASTIC_SIMULATION ) call compute_forces_viscoelastic()
-
-    ! poroelastic solver
-    if( POROELASTIC_SIMULATION ) call compute_forces_poroelastic()
-
-    ! restores last time snapshot saved for backward/reconstruction of wavefields
-    ! note: this must be read in after the Newmark time scheme
-    if( SIMULATION_TYPE == 3 .and. it == 1 ) then
-     call it_read_forward_arrays()
-    endif
-
-    ! write the seismograms with time shift (GPU_MODE transfer included)
-    if (nrec_local > 0 .or. ( WRITE_SEISMOGRAMS_BY_MASTER .and. myrank == 0 ) ) then
-      call write_seismograms()
-    endif
-
-    ! resetting d/v/a/R/eps for the backward reconstruction with attenuation
-    if (ATTENUATION ) then
-      call it_store_attenuation_arrays()
-    endif
-
-    ! adjoint simulations: kernels
-    if( SIMULATION_TYPE == 3 ) then
-      call compute_kernels()
-    endif
-
-    ! outputs movie files
-    if( MOVIE_SIMULATION ) then
-      call write_movie_output()
-    endif
-
-    ! first step of noise tomography, i.e., save a surface movie at every time step
-    if ( NOISE_TOMOGRAPHY == 1) then
-      if( num_free_surface_faces > 0) then
-        call noise_save_surface_movie(displ,ibool, &
-                            noise_surface_movie,it, &
-                            NSPEC_AB,NGLOB_AB, &
-                            num_free_surface_faces,free_surface_ispec,free_surface_ijk, &
-                            Mesh_pointer,GPU_MODE)
-      endif
-    endif
-
-!
-!---- end of time iteration loop
-!
-  enddo   ! end of main time loop
-
-  call it_print_elapsed_time()
-
-  ! Transfer fields from GPU card to host for further analysis
-  if(GPU_MODE) call it_transfer_from_GPU()
-
-  end subroutine iterate_time
-
-
-!=====================================================================
-
-  subroutine it_check_stability()
-
-! computes the maximum of the norm of the displacement
-! in all the slices using an MPI reduction
-! and output timestamp file to check that simulation is running fine
-
-  use specfem_par
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-  use specfem_par_acoustic
-  implicit none
-
-  double precision :: tCPU,t_remain,t_total
-  integer :: ihours,iminutes,iseconds,int_tCPU, &
-             ihours_remain,iminutes_remain,iseconds_remain,int_t_remain, &
-             ihours_total,iminutes_total,iseconds_total,int_t_total
-
-  ! maximum of the norm of the displacement
-  real(kind=CUSTOM_REAL) Usolidnorm,Usolidnorm_all ! elastic
-  real(kind=CUSTOM_REAL) Usolidnormp,Usolidnormp_all ! acoustic
-  real(kind=CUSTOM_REAL) Usolidnorms,Usolidnorms_all ! solid poroelastic
-  real(kind=CUSTOM_REAL) Usolidnormw,Usolidnormw_all ! fluid (w.r.t.s) poroelastic
-
-  ! norm of the backward displacement
-  real(kind=CUSTOM_REAL) b_Usolidnorm, b_Usolidnorm_all
-
-  ! initializes
-  Usolidnorm_all = 0.0_CUSTOM_REAL
-  Usolidnormp_all = 0.0_CUSTOM_REAL
-  Usolidnorms_all = 0.0_CUSTOM_REAL
-  Usolidnormw_all = 0.0_CUSTOM_REAL
-
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-!chris: Rewrite to get norm for each material when coupled simulations
-!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!
-
-! compute maximum of norm of displacement in each slice
-  if( ELASTIC_SIMULATION ) then
-    if( GPU_MODE) then
-      ! way 2: just get maximum of field from GPU
-      call get_norm_elastic_from_device(Usolidnorm,Mesh_pointer,1)
-    else
-      Usolidnorm = maxval(sqrt(displ(1,:)**2 + displ(2,:)**2 + displ(3,:)**2))
-    endif
-
-    ! check stability of the code, exit if unstable
-    ! negative values can occur with some compilers when the unstable value is greater
-    ! than the greatest possible floating-point number of the machine
-    !if(Usolidnorm > STABILITY_THRESHOLD .or. Usolidnorm < 0.0_CUSTOM_REAL) &
-    !  call exit_MPI(myrank,'single forward simulation became unstable and blew up')
-
-    ! compute the maximum of the maxima for all the slices using an MPI reduction
-    call max_all_cr(Usolidnorm,Usolidnorm_all)
-  endif
-
-  if( ACOUSTIC_SIMULATION ) then
-    if(GPU_MODE) then
-      ! way 2: just get maximum of field from GPU
-      call get_norm_acoustic_from_device(Usolidnormp,Mesh_pointer,1)
-    else
-      Usolidnormp = maxval(abs(potential_dot_dot_acoustic(:)))
-    endif
-
-    ! compute the maximum of the maxima for all the slices using an MPI reduction
-    call max_all_cr(Usolidnormp,Usolidnormp_all)
-  endif
-
-  if( POROELASTIC_SIMULATION ) then
-    Usolidnorms = maxval(sqrt(displs_poroelastic(1,:)**2 + displs_poroelastic(2,:)**2 + &
-                             displs_poroelastic(3,:)**2))
-    Usolidnormw = maxval(sqrt(displw_poroelastic(1,:)**2 + displw_poroelastic(2,:)**2 + &
-                             displw_poroelastic(3,:)**2))
-
-    ! compute the maximum of the maxima for all the slices using an MPI reduction
-    call max_all_cr(Usolidnorms,Usolidnorms_all)
-    call max_all_cr(Usolidnormw,Usolidnormw_all)
-  endif
-
-
-  ! adjoint simulations
-  if( SIMULATION_TYPE == 3 ) then
-    if( ELASTIC_SIMULATION ) then
-      ! way 2
-      if(GPU_MODE) then
-        call get_norm_elastic_from_device(b_Usolidnorm,Mesh_pointer,3)
-      else
-        b_Usolidnorm = maxval(sqrt(b_displ(1,:)**2 + b_displ(2,:)**2 + b_displ(3,:)**2))
-      endif
-    endif
-    if( ACOUSTIC_SIMULATION ) then
-        ! way 2
-        if(GPU_MODE) then
-          call get_norm_acoustic_from_device(b_Usolidnorm,Mesh_pointer,3)
-        else
-          b_Usolidnorm = maxval(abs(b_potential_dot_dot_acoustic(:)))
-        endif
-    endif
-    if( POROELASTIC_SIMULATION ) then
-        b_Usolidnorm = maxval(sqrt(b_displs_poroelastic(1,:)**2 + b_displs_poroelastic(2,:)**2 + &
-                                 b_displs_poroelastic(3,:)**2))
-    endif
-    ! check stability of the code, exit if unstable
-    ! negative values can occur with some compilers when the unstable value is greater
-    ! than the greatest possible floating-point number of the machine
-    !if(b_Usolidnorm > STABILITY_THRESHOLD .or. b_Usolidnorm < 0.0_CUSTOM_REAL) &
-    !  call exit_MPI(myrank,'single backward simulation became unstable and blew up')
-
-    ! compute max of all slices
-    call max_all_cr(b_Usolidnorm,b_Usolidnorm_all)
-  endif
-
-  ! user output
-  if(myrank == 0) then
-
-    write(IMAIN,*) 'Time step # ',it
-    write(IMAIN,*) 'Time: ',sngl((it-1)*DT-t0),' seconds'
-
-    ! elapsed time since beginning of the simulation
-    tCPU = wtime() - time_start
-    int_tCPU = int(tCPU)
-    ihours = int_tCPU / 3600
-    iminutes = (int_tCPU - 3600*ihours) / 60
-    iseconds = int_tCPU - 3600*ihours - 60*iminutes
-    write(IMAIN,*) 'Elapsed time in seconds = ',tCPU
-    write(IMAIN,"(' Elapsed time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") ihours,iminutes,iseconds
-    write(IMAIN,*) 'Mean elapsed time per time step in seconds = ',sngl(tCPU/dble(it))
-    if( ELASTIC_SIMULATION ) then
-      write(IMAIN,*) 'Max norm displacement vector U in all slices (m) = ',Usolidnorm_all
-    endif
-    if( ACOUSTIC_SIMULATION ) then
-        write(IMAIN,*) 'Max norm pressure P in all slices (Pa) = ',Usolidnormp_all
-    endif
-    if( POROELASTIC_SIMULATION ) then
-        write(IMAIN,*) 'Max norm displacement vector Us in all slices (m) = ',Usolidnorms_all
-        write(IMAIN,*) 'Max norm displacement vector W in all slices (m) = ',Usolidnormw_all
-    endif
-    ! adjoint simulations
-    if (SIMULATION_TYPE == 3) write(IMAIN,*) &
-           'Max norm U (backward) in all slices = ',b_Usolidnorm_all
-
-    ! compute estimated remaining simulation time
-    t_remain = (NSTEP - it) * (tCPU/dble(it))
-    int_t_remain = int(t_remain)
-    ihours_remain = int_t_remain / 3600
-    iminutes_remain = (int_t_remain - 3600*ihours_remain) / 60
-    iseconds_remain = int_t_remain - 3600*ihours_remain - 60*iminutes_remain
-    write(IMAIN,*) 'Time steps done = ',it,' out of ',NSTEP
-    write(IMAIN,*) 'Time steps remaining = ',NSTEP - it
-    write(IMAIN,*) 'Estimated remaining time in seconds = ',sngl(t_remain)
-    write(IMAIN,"(' Estimated remaining time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") &
-             ihours_remain,iminutes_remain,iseconds_remain
-
-    ! compute estimated total simulation time
-    t_total = t_remain + tCPU
-    int_t_total = int(t_total)
-    ihours_total = int_t_total / 3600
-    iminutes_total = (int_t_total - 3600*ihours_total) / 60
-    iseconds_total = int_t_total - 3600*ihours_total - 60*iminutes_total
-    write(IMAIN,*) 'Estimated total run time in seconds = ',sngl(t_total)
-    write(IMAIN,"(' Estimated total run time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") &
-             ihours_total,iminutes_total,iseconds_total
-    write(IMAIN,*) 'We have done ',sngl(100.d0*dble(it)/dble(NSTEP)),'% of that'
-
-    if(it < 100) then
-      write(IMAIN,*) '************************************************************'
-      write(IMAIN,*) '**** BEWARE: the above time estimates are not reliable'
-      write(IMAIN,*) '**** because fewer than 100 iterations have been performed'
-      write(IMAIN,*) '************************************************************'
-    endif
-    write(IMAIN,*)
-
-    ! flushes file buffer for main output file (IMAIN)
-    call flush_IMAIN()
-
-    ! write time stamp file to give information about progression of simulation
-    write(outputname,"('/timestamp',i6.6)") it
-    open(unit=IOUT,file=trim(OUTPUT_FILES)//outputname,status='unknown')
-    write(IOUT,*) 'Time step # ',it
-    write(IOUT,*) 'Time: ',sngl((it-1)*DT-t0),' seconds'
-    write(IOUT,*) 'Elapsed time in seconds = ',tCPU
-    write(IOUT,"(' Elapsed time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") ihours,iminutes,iseconds
-    write(IOUT,*) 'Mean elapsed time per time step in seconds = ',tCPU/dble(it)
-    if( ELASTIC_SIMULATION ) then
-      write(IOUT,*) 'Max norm displacement vector U in all slices (m) = ',Usolidnorm_all
-    endif
-    if( ACOUSTIC_SIMULATION ) then
-        write(IOUT,*) 'Max norm pressure P in all slices (Pa) = ',Usolidnormp_all
-    endif
-    if( POROELASTIC_SIMULATION ) then
-        write(IOUT,*) 'Max norm displacement vector Us in all slices (m) = ',Usolidnorms_all
-        write(IOUT,*) 'Max norm displacement vector W in all slices (m) = ',Usolidnormw_all
-    endif
-    ! adjoint simulations
-    if (SIMULATION_TYPE == 3) write(IOUT,*) &
-           'Max norm U (backward) in all slices = ',b_Usolidnorm_all
-    ! estimation
-    write(IOUT,*) 'Time steps done = ',it,' out of ',NSTEP
-    write(IOUT,*) 'Time steps remaining = ',NSTEP - it
-    write(IOUT,*) 'Estimated remaining time in seconds = ',t_remain
-    write(IOUT,"(' Estimated remaining time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") &
-             ihours_remain,iminutes_remain,iseconds_remain
-    write(IOUT,*) 'Estimated total run time in seconds = ',t_total
-    write(IOUT,"(' Estimated total run time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") &
-             ihours_total,iminutes_total,iseconds_total
-    write(IOUT,*) 'We have done ',sngl(100.d0*dble(it)/dble(NSTEP)),'% of that'
-    close(IOUT)
-
-    ! check stability of the code, exit if unstable
-    ! negative values can occur with some compilers when the unstable value is greater
-    ! than the greatest possible floating-point number of the machine
-    if(Usolidnorm_all > STABILITY_THRESHOLD .or. Usolidnorm_all < 0.0_CUSTOM_REAL &
-     .or. Usolidnormp_all > STABILITY_THRESHOLD .or. Usolidnormp_all < 0.0_CUSTOM_REAL &
-     .or. Usolidnorms_all > STABILITY_THRESHOLD .or. Usolidnorms_all < 0.0_CUSTOM_REAL &
-     .or. Usolidnormw_all > STABILITY_THRESHOLD .or. Usolidnormw_all < 0.0_CUSTOM_REAL) &
-        call exit_MPI(myrank,'forward simulation became unstable and blew up')
-    ! adjoint simulations
-    if(SIMULATION_TYPE == 3 .and. (b_Usolidnorm_all > STABILITY_THRESHOLD &
-      .or. b_Usolidnorm_all < 0.0)) &
-        call exit_MPI(myrank,'backward simulation became unstable and blew up')
-
-  endif ! myrank
-
-  end subroutine it_check_stability
-
-
-!=====================================================================
-
-  subroutine it_update_displacement_scheme()
-
-! explicit Newmark time scheme with acoustic & elastic domains:
-! (see e.g. Hughes, 1987; Chaljub et al., 2003)
-!
-! chi(t+delta_t) = chi(t) + delta_t chi_dot(t) + 1/2 delta_t**2 chi_dot_dot(t)
-! chi_dot(t+delta_t) = chi_dot(t) + 1/2 delta_t chi_dot_dot(t) + 1/2 delta_t chi_dot_dot(t+delta_t)
-! chi_dot_dot(t+delta_t) = 1/M_acoustic( -K_acoustic chi(t+delta) + B_acoustic u(t+delta_t) + f(t+delta_t) )
-!
-! u(t+delta_t) = u(t) + delta_t  v(t) + 1/2  delta_t**2 a(t)
-! v(t+delta_t) = v(t) + 1/2 delta_t a(t) + 1/2 delta_t a(t+delta_t)
-! a(t+delta_t) = 1/M_elastic ( -K_elastic u(t+delta) + B_elastic chi_dot_dot(t+delta_t) + f( t+delta_t) )
-!
-! where
-!   chi, chi_dot, chi_dot_dot are acoustic (fluid) potentials ( dotted with respect to time)
-!   u, v, a are displacement,velocity & acceleration
-!   M is mass matrix, K stiffness matrix and B boundary term for acoustic/elastic domains
-!   f denotes a source term (acoustic/elastic)
-!
-! note that this stage calculates the predictor terms
-!
-!   for
-!   potential chi_dot(t+delta) requires + 1/2 delta_t chi_dot_dot(t+delta_t)
-!                                   at a later stage (corrector) once where chi_dot_dot(t+delta) is calculated
-!   and similar,
-!   velocity v(t+delta_t) requires  + 1/2 delta_t a(t+delta_t)
-!                                   at a later stage once where a(t+delta) is calculated
-! also:
-!   boundary term B_elastic requires chi_dot_dot(t+delta)
-!                                   thus chi_dot_dot has to be updated first before the elastic boundary term is considered
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-
-  implicit none
-
-! updates acoustic potentials
-  if( ACOUSTIC_SIMULATION ) then
-
-    if(.NOT. GPU_MODE) then
-      ! on CPU
-      potential_acoustic(:) = potential_acoustic(:) &
-                            + deltat * potential_dot_acoustic(:) &
-                            + deltatsqover2 * potential_dot_dot_acoustic(:)
-      potential_dot_acoustic(:) = potential_dot_acoustic(:) &
-                                + deltatover2 * potential_dot_dot_acoustic(:)
-      potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
-    else
-      ! on GPU
-      call it_update_displacement_ac_cuda(Mesh_pointer, NGLOB_AB, &
-                                          deltat, deltatsqover2, deltatover2, &
-                                          b_deltat, b_deltatsqover2, b_deltatover2)
-    endif
-
-  endif ! ACOUSTIC_SIMULATION
-
-! updates elastic displacement and velocity
-  if( ELASTIC_SIMULATION ) then
-
-    if(.NOT. GPU_MODE) then
-      ! on CPU
-      displ(:,:) = displ(:,:) + deltat*veloc(:,:) + deltatsqover2*accel(:,:)
-      veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
-      if( SIMULATION_TYPE /= 1 ) accel_adj_coupling(:,:) = accel(:,:)
-      accel(:,:) = 0._CUSTOM_REAL
-    else
-      ! on GPU
-      ! Includes SIM_TYPE 1 & 3 (for noise tomography)
-      call it_update_displacement_cuda(Mesh_pointer, size(displ), deltat, deltatsqover2,&
-                                       deltatover2, b_deltat, b_deltatsqover2, b_deltatover2)
-    endif
-  endif
-
-! updates poroelastic displacements and velocities
-  if( POROELASTIC_SIMULATION ) then
-    ! solid phase
-    displs_poroelastic(:,:) = displs_poroelastic(:,:) + deltat*velocs_poroelastic(:,:) + &
-                              deltatsqover2*accels_poroelastic(:,:)
-    velocs_poroelastic(:,:) = velocs_poroelastic(:,:) + deltatover2*accels_poroelastic(:,:)
-    accels_poroelastic(:,:) = 0._CUSTOM_REAL
-
-    ! fluid phase
-    displw_poroelastic(:,:) = displw_poroelastic(:,:) + deltat*velocw_poroelastic(:,:) + &
-                              deltatsqover2*accelw_poroelastic(:,:)
-    velocw_poroelastic(:,:) = velocw_poroelastic(:,:) + deltatover2*accelw_poroelastic(:,:)
-    accelw_poroelastic(:,:) = 0._CUSTOM_REAL
-  endif
-
-! adjoint simulations
-  if (SIMULATION_TYPE == 3 .and. .NOT. GPU_MODE) then
-    ! acoustic backward fields
-    if( ACOUSTIC_SIMULATION ) then
-      b_potential_acoustic(:) = b_potential_acoustic(:) &
-                              + b_deltat * b_potential_dot_acoustic(:) &
-                              + b_deltatsqover2 * b_potential_dot_dot_acoustic(:)
-      b_potential_dot_acoustic(:) = b_potential_dot_acoustic(:) &
-                                  + b_deltatover2 * b_potential_dot_dot_acoustic(:)
-      b_potential_dot_dot_acoustic(:) = 0._CUSTOM_REAL
-    endif
-
-    ! elastic backward fields
-    if( ELASTIC_SIMULATION ) then
-      b_displ(:,:) = b_displ(:,:) + b_deltat*b_veloc(:,:) + b_deltatsqover2*b_accel(:,:)
-      b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:)
-      b_accel(:,:) = 0._CUSTOM_REAL
-    endif
-    ! poroelastic backward fields
-    if( POROELASTIC_SIMULATION ) then
-    ! solid phase
-    b_displs_poroelastic(:,:) = b_displs_poroelastic(:,:) + b_deltat*b_velocs_poroelastic(:,:) + &
-                              b_deltatsqover2*b_accels_poroelastic(:,:)
-    b_velocs_poroelastic(:,:) = b_velocs_poroelastic(:,:) + b_deltatover2*b_accels_poroelastic(:,:)
-    b_accels_poroelastic(:,:) = 0._CUSTOM_REAL
-
-    ! fluid phase
-    b_displw_poroelastic(:,:) = b_displw_poroelastic(:,:) + b_deltat*b_velocw_poroelastic(:,:) + &
-                              b_deltatsqover2*b_accelw_poroelastic(:,:)
-    b_velocw_poroelastic(:,:) = b_velocw_poroelastic(:,:) + b_deltatover2*b_accelw_poroelastic(:,:)
-    b_accelw_poroelastic(:,:) = 0._CUSTOM_REAL
-    endif
-  endif
-
-! adjoint simulations: moho kernel
-  if (SAVE_MOHO_MESH .and. SIMULATION_TYPE == 3) then
-    ispec2D_moho_top = 0
-    ispec2D_moho_bot = 0
-  endif
-
-
-  end subroutine it_update_displacement_scheme
-
-!=====================================================================
-
-  subroutine it_read_forward_arrays()
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-  implicit none
-
-  integer :: ier
-
-! restores last time snapshot saved for backward/reconstruction of wavefields
-! note: this is done here after the Newmark time scheme, otherwise the indexing for sources
-!          and adjoint sources will become more complicated
-!          that is, index it for adjoint sources will match index NSTEP - 1 for backward/reconstructed wavefields
-
-  ! reads in wavefields
-  open(unit=27,file=trim(prname)//'save_forward_arrays.bin',status='old',&
-        action='read',form='unformatted',iostat=ier)
-  if( ier /= 0 ) then
-    print*,'error: opening save_forward_arrays'
-    print*,'path: ',trim(prname)//'save_forward_arrays.bin'
-    call exit_mpi(myrank,'error open file save_forward_arrays.bin')
-  endif
-
-  if( ACOUSTIC_SIMULATION ) then
-    read(27) b_potential_acoustic
-    read(27) b_potential_dot_acoustic
-    read(27) b_potential_dot_dot_acoustic
-
-    ! transfers fields onto GPU
-    if(GPU_MODE) &
-      call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
-                          b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
-  endif
-
-  ! elastic wavefields
-  if( ELASTIC_SIMULATION ) then
-    read(27) b_displ
-    read(27) b_veloc
-    read(27) b_accel
-
-    ! puts elastic wavefield to GPU
-    if(GPU_MODE) &
-      call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel,Mesh_pointer)
-
-    ! memory variables if attenuation
-    if( ATTENUATION ) then
-      if(FULL_ATTENUATION_SOLID) read(27) b_R_trace  !ZN
-      read(27) b_R_xx
-      read(27) b_R_yy
-      read(27) b_R_xy
-      read(27) b_R_xz
-      read(27) b_R_yz
-      if(FULL_ATTENUATION_SOLID) read(27) b_epsilondev_trace !ZN
-      read(27) b_epsilondev_xx
-      read(27) b_epsilondev_yy
-      read(27) b_epsilondev_xy
-      read(27) b_epsilondev_xz
-      read(27) b_epsilondev_yz
-
-      ! puts elastic attenuation arrays to GPU
-      if(GPU_MODE) &
-          call transfer_b_fields_att_to_device(Mesh_pointer, &
-                    b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz,size(b_R_xx), &
-!ZN                 b_R_trace,b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz,size(b_R_xx), &  ! please change the above line with this
-                    b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
-!ZN                 b_epsilondev_trace,b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
-!ZN                 ! please change the above line with this
-                    size(b_epsilondev_xx))
-    endif
-
-  endif
-
-  ! poroelastic wavefields
-  if( POROELASTIC_SIMULATION ) then
-    read(27) b_displs_poroelastic
-    read(27) b_velocs_poroelastic
-    read(27) b_accels_poroelastic
-    read(27) b_displw_poroelastic
-    read(27) b_velocw_poroelastic
-    read(27) b_accelw_poroelastic
-  endif
-
-  close(27)
-
-  end subroutine it_read_forward_arrays
-
-!=====================================================================
-
-  subroutine it_store_attenuation_arrays()
-
-! resetting d/v/a/R/eps for the backward reconstruction with attenuation
-
-  use specfem_par
-  use specfem_par_elastic
-  use specfem_par_acoustic
-
-  implicit none
-
-  if( it > 1 .and. it < NSTEP) then
-    ! adjoint simulations
-
-! note backward/reconstructed wavefields:
-!       storing wavefield displ() at time step it, corresponds to time (it-1)*DT - t0 (see routine write_seismograms_to_file )
-!       reconstucted wavefield b_displ() at it corresponds to time (NSTEP-it-1)*DT - t0
-!       we read in the reconstructed wavefield at the end of the time iteration loop, i.e. after the Newmark scheme,
-!       thus, indexing is NSTEP-it (rather than something like NSTEP-(it-1) )
-    if (SIMULATION_TYPE == 3 .and. mod(NSTEP-it,NSTEP_Q_SAVE) == 0) then
-      ! reads files content
-      write(outputname,"('save_Q_arrays_',i6.6,'.bin')") NSTEP-it
-      open(unit=27,file=trim(prname_Q)//trim(outputname),status='old',&
-            action='read',form='unformatted')
-      if( ELASTIC_SIMULATION ) then
-        ! reads arrays from disk files
-        read(27) b_displ
-        read(27) b_veloc
-        read(27) b_accel
-
-        ! puts elastic fields onto GPU
-        if(GPU_MODE) then
-          ! wavefields
-          call transfer_b_fields_to_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
-        endif
-        if(FULL_ATTENUATION_SOLID) read(27) b_R_trace !ZN
-        read(27) b_R_xx
-        read(27) b_R_yy
-        read(27) b_R_xy
-        read(27) b_R_xz
-        read(27) b_R_yz
-        if(FULL_ATTENUATION_SOLID) read(27) b_epsilondev_trace !ZN
-        read(27) b_epsilondev_xx
-        read(27) b_epsilondev_yy
-        read(27) b_epsilondev_xy
-        read(27) b_epsilondev_xz
-        read(27) b_epsilondev_yz
-
-        ! puts elastic fields onto GPU
-        if(GPU_MODE) then
-          ! attenuation arrays
-          call transfer_b_fields_att_to_device(Mesh_pointer, &
-                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz,size(b_R_xx), &
-!ZN               b_R_trace,b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz,size(b_R_xx), &
-                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
-!ZN               b_epsilondev_trace,b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy,b_epsilondev_xz,b_epsilondev_yz, &
-                  size(b_epsilondev_xx))
-        endif
-      endif
-
-      if( ACOUSTIC_SIMULATION ) then
-        ! reads arrays from disk files
-        read(27) b_potential_acoustic
-        read(27) b_potential_dot_acoustic
-        read(27) b_potential_dot_dot_acoustic
-
-        ! puts acoustic fields onto GPU
-        if(GPU_MODE) &
-          call transfer_b_fields_ac_to_device(NGLOB_AB,b_potential_acoustic, &
-                              b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
-
-      endif
-      close(27)
-    else if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD .and. mod(it,NSTEP_Q_SAVE) == 0) then
-      ! stores files content
-      write(outputname,"('save_Q_arrays_',i6.6,'.bin')") it
-      open(unit=27,file=trim(prname_Q)//trim(outputname),status='unknown',&
-           action='write',form='unformatted')
-      if( ELASTIC_SIMULATION ) then
-        ! gets elastic fields from GPU onto CPU
-        if(GPU_MODE) then
-          call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
-        endif
-
-        ! writes to disk file
-        write(27) displ
-        write(27) veloc
-        write(27) accel
-
-        if(GPU_MODE) then
-          ! attenuation arrays
-          call transfer_fields_att_from_device(Mesh_pointer, &
-                     R_xx,R_yy,R_xy,R_xz,R_yz,size(R_xx), &
-!ZN                  R_trace,R_xx,R_yy,R_xy,R_xz,R_yz,size(R_xx), &
-                     epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
-!ZN                  epsilondev_trace,epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
-                     size(epsilondev_xx))
-        endif
-
-        if(FULL_ATTENUATION_SOLID) write(27) R_trace  !ZN
-        write(27) R_xx
-        write(27) R_yy
-        write(27) R_xy
-        write(27) R_xz
-        write(27) R_yz
-        if(FULL_ATTENUATION_SOLID) write(27) epsilondev_trace !ZN
-        write(27) epsilondev_xx
-        write(27) epsilondev_yy
-        write(27) epsilondev_xy
-        write(27) epsilondev_xz
-        write(27) epsilondev_yz
-      endif
-      if( ACOUSTIC_SIMULATION ) then
-       ! gets acoustic fields from GPU onto CPU
-        if(GPU_MODE) &
-          call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
-                              potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
-
-        ! writes to disk file
-        write(27) potential_acoustic
-        write(27) potential_dot_acoustic
-        write(27) potential_dot_dot_acoustic
-      endif
-      close(27)
-    endif ! SIMULATION_TYPE
-  endif ! it
-
-  end subroutine it_store_attenuation_arrays
-
-!=====================================================================
-
-  subroutine it_print_elapsed_time()
-
-    use specfem_par
-    use specfem_par_elastic
-    use specfem_par_acoustic
-    implicit none
-
-    ! local parameters
-    double precision :: tCPU
-    integer :: ihours,iminutes,iseconds,int_tCPU
-
-    if(myrank == 0) then
-       ! elapsed time since beginning of the simulation
-       tCPU = wtime() - time_start
-       int_tCPU = int(tCPU)
-       ihours = int_tCPU / 3600
-       iminutes = (int_tCPU - 3600*ihours) / 60
-       iseconds = int_tCPU - 3600*ihours - 60*iminutes
-       write(IMAIN,*) 'Time-Loop Complete. Timing info:'
-       write(IMAIN,*) 'Total elapsed time in seconds = ',tCPU
-       write(IMAIN,"(' Total elapsed time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") ihours,iminutes,iseconds
-    endif
-
-  end subroutine it_print_elapsed_time
-
-!=====================================================================
-
-  subroutine it_transfer_from_GPU()
-
-! transfers fields on GPU back onto CPU
-
-  use specfem_par
-  use specfem_par_elastic
-  use specfem_par_acoustic
-
-  implicit none
-
-  ! to store forward wave fields
-  if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
-
-    ! acoustic potentials
-    if( ACOUSTIC_SIMULATION ) &
-      call transfer_fields_ac_from_device(NGLOB_AB,potential_acoustic, &
-                            potential_dot_acoustic, potential_dot_dot_acoustic, Mesh_pointer)
-
-    ! elastic wavefield
-    if( ELASTIC_SIMULATION ) then
-      call transfer_fields_el_from_device(NDIM*NGLOB_AB,displ,veloc, accel, Mesh_pointer)
-
-      if (ATTENUATION) &
-        call transfer_fields_att_from_device(Mesh_pointer, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz,size(R_xx), &
-!ZN                 R_trace,R_xx,R_yy,R_xy,R_xz,R_yz,size(R_xx), &
-                    epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
-!ZN                 epsilondev_trace,epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
-                    size(epsilondev_xx))
-
-    endif
-  else if (SIMULATION_TYPE == 3) then
-
-    ! to store kernels
-    ! acoustic domains
-    if( ACOUSTIC_SIMULATION ) then
-      ! only in case needed...
-      !call transfer_b_fields_ac_from_device(NGLOB_AB,b_potential_acoustic, &
-      !                      b_potential_dot_acoustic, b_potential_dot_dot_acoustic, Mesh_pointer)
-
-      ! acoustic kernels
-      call transfer_kernels_ac_to_host(Mesh_pointer,rho_ac_kl,kappa_ac_kl,NSPEC_AB)
-    endif
-
-    ! elastic domains
-    if( ELASTIC_SIMULATION ) then
-      ! only in case needed...
-      !call transfer_b_fields_from_device(NDIM*NGLOB_AB,b_displ,b_veloc,b_accel, Mesh_pointer)
-
-      ! elastic kernels
-      call transfer_kernels_el_to_host(Mesh_pointer,rho_kl,mu_kl,kappa_kl,NSPEC_AB)
-    endif
-
-    ! specific noise strength kernel
-    if( NOISE_TOMOGRAPHY == 3 ) then
-      call transfer_kernels_noise_to_host(Mesh_pointer,Sigma_kl,NSPEC_AB)
-    endif
-
-    ! approximative hessian for preconditioning kernels
-    if ( APPROXIMATE_HESS_KL ) then
-      if( ELASTIC_SIMULATION ) call transfer_kernels_hess_el_tohost(Mesh_pointer,hess_kl,NSPEC_AB)
-      if( ACOUSTIC_SIMULATION ) call transfer_kernels_hess_ac_tohost(Mesh_pointer,hess_ac_kl,NSPEC_AB)
-    endif
-
-  endif
-
-  ! frees allocated memory on GPU
-  call prepare_cleanup_device(Mesh_pointer, &
-                              SAVE_FORWARD, &
-                              ACOUSTIC_SIMULATION,ELASTIC_SIMULATION, &
-                              ABSORBING_CONDITIONS,NOISE_TOMOGRAPHY,COMPUTE_AND_STORE_STRAIN, &
-                              ATTENUATION,ANISOTROPY,APPROXIMATE_OCEAN_LOAD, &
-                              APPROXIMATE_HESS_KL)
-
-  end subroutine it_transfer_from_GPU



More information about the CIG-COMMITS mailing list