[cig-commits] r21238 - seismo/3D/SPECFEM3D/trunk/src/specfem3D

dkomati1 at geodynamics.org dkomati1 at geodynamics.org
Wed Jan 16 09:48:05 PST 2013


Author: dkomati1
Date: 2013-01-16 09:48:05 -0800 (Wed, 16 Jan 2013)
New Revision: 21238

Removed:
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_Dev2.f90
Modified:
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/Makefile.in
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic.F90
Log:
got rid of compute_forces_elastic_Dev_6p .. compute_forces_elastic_Dev_10p, which are never used and would require too much work to maintain (now that we are adding C-PML, also adding loop vectorization and so on)


Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/Makefile.in	2013-01-16 16:59:14 UTC (rev 21237)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/Makefile.in	2013-01-16 17:48:05 UTC (rev 21238)
@@ -193,7 +193,6 @@
 	$O/compute_forces_acoustic_pot.o \
 	$O/compute_forces_elastic.o \
 	$O/compute_forces_elastic_Dev.o \
-	$O/compute_forces_elastic_Dev2.o \
 	$O/compute_forces_elastic_noDev.o \
 	$O/compute_forces_fluid.o \
 	$O/compute_forces_poroelastic.o \

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic.F90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic.F90	2013-01-16 16:59:14 UTC (rev 21237)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic.F90	2013-01-16 17:48:05 UTC (rev 21238)
@@ -487,136 +487,6 @@
              phase_ispec_inner_elastic )
 #endif
 
-  case (6)
-    call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
-                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                    kappastore,mustore,jacobian,ibool, &
-                    ATTENUATION,deltat, &
-                    one_minus_sum_beta,factor_common, &
-                    alphaval,betaval,gammaval, &
-                    NSPEC_ATTENUATION_AB, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                    ANISOTROPY,NSPEC_ANISO, &
-                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                    c55store,c56store,c66store, &
-                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                    is_moho_top,is_moho_bot, &
-                    dsdx_top,dsdx_bot, &
-                    ispec2D_moho_top,ispec2D_moho_bot, &
-                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                    phase_ispec_inner_elastic )
-
-  case (7)
-    call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
-                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                    kappastore,mustore,jacobian,ibool, &
-                    ATTENUATION,deltat, &
-                    one_minus_sum_beta,factor_common, &
-                    alphaval,betaval,gammaval, &
-                    NSPEC_ATTENUATION_AB, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                    ANISOTROPY,NSPEC_ANISO, &
-                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                    c55store,c56store,c66store, &
-                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                    is_moho_top,is_moho_bot, &
-                    dsdx_top,dsdx_bot, &
-                    ispec2D_moho_top,ispec2D_moho_bot, &
-                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                    phase_ispec_inner_elastic )
-
-  case (8)
-    call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
-                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                    kappastore,mustore,jacobian,ibool, &
-                    ATTENUATION,deltat, &
-                    one_minus_sum_beta,factor_common, &
-                    alphaval,betaval,gammaval, &
-                    NSPEC_ATTENUATION_AB, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                    ANISOTROPY,NSPEC_ANISO, &
-                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                    c55store,c56store,c66store, &
-                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                    is_moho_top,is_moho_bot, &
-                    dsdx_top,dsdx_bot, &
-                    ispec2D_moho_top,ispec2D_moho_bot, &
-                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                    phase_ispec_inner_elastic )
-
-  case (9)
-    call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
-                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                    kappastore,mustore,jacobian,ibool, &
-                    ATTENUATION,deltat, &
-                    one_minus_sum_beta,factor_common, &
-                    alphaval,betaval,gammaval, &
-                    NSPEC_ATTENUATION_AB, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                    ANISOTROPY,NSPEC_ANISO, &
-                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                    c55store,c56store,c66store, &
-                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                    is_moho_top,is_moho_bot, &
-                    dsdx_top,dsdx_bot, &
-                    ispec2D_moho_top,ispec2D_moho_bot, &
-                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                    phase_ispec_inner_elastic )
-
-  case (10)
-    call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
-                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                    hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                    kappastore,mustore,jacobian,ibool, &
-                    ATTENUATION,deltat, &
-                    one_minus_sum_beta,factor_common, &
-                    alphaval,betaval,gammaval, &
-                    NSPEC_ATTENUATION_AB, &
-                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                    ANISOTROPY,NSPEC_ANISO, &
-                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                    c55store,c56store,c66store, &
-                    SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                    is_moho_top,is_moho_bot, &
-                    dsdx_top,dsdx_bot, &
-                    ispec2D_moho_top,ispec2D_moho_bot, &
-                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                    phase_ispec_inner_elastic )
-
   case default
 
     stop 'error no Deville routine available for chosen NGLLX'
@@ -672,141 +542,6 @@
                   num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
                   phase_ispec_inner_elastic )
 
-  case (6)
-    call compute_forces_elastic_Dev_6p(iphase, NSPEC_AB,NGLOB_AB, &
-                  b_displ,b_veloc,b_accel, &
-                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                  kappastore,mustore,jacobian,ibool, &
-                  ATTENUATION,deltat, &
-                  one_minus_sum_beta,factor_common, &
-                  b_alphaval,b_betaval,b_gammaval, &
-                  NSPEC_ATTENUATION_AB, &
-                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                  ANISOTROPY,NSPEC_ANISO, &
-                  c11store,c12store,c13store,c14store,c15store,c16store,&
-                  c22store,c23store,c24store,c25store,c26store,c33store,&
-                  c34store,c35store,c36store,c44store,c45store,c46store,&
-                  c55store,c56store,c66store, &
-                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                  is_moho_top,is_moho_bot, &
-                  b_dsdx_top,b_dsdx_bot, &
-                  ispec2D_moho_top,ispec2D_moho_bot, &
-                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                  phase_ispec_inner_elastic )
-
-  case (7)
-    call compute_forces_elastic_Dev_7p(iphase, NSPEC_AB,NGLOB_AB, &
-                  b_displ,b_veloc,b_accel, &
-                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                  kappastore,mustore,jacobian,ibool, &
-                  ATTENUATION,deltat, &
-                  one_minus_sum_beta,factor_common, &
-                  b_alphaval,b_betaval,b_gammaval, &
-                  NSPEC_ATTENUATION_AB, &
-                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                  ANISOTROPY,NSPEC_ANISO, &
-                  c11store,c12store,c13store,c14store,c15store,c16store,&
-                  c22store,c23store,c24store,c25store,c26store,c33store,&
-                  c34store,c35store,c36store,c44store,c45store,c46store,&
-                  c55store,c56store,c66store, &
-                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                  is_moho_top,is_moho_bot, &
-                  b_dsdx_top,b_dsdx_bot, &
-                  ispec2D_moho_top,ispec2D_moho_bot, &
-                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                  phase_ispec_inner_elastic )
-
-  case (8)
-    call compute_forces_elastic_Dev_8p(iphase, NSPEC_AB,NGLOB_AB, &
-                  b_displ,b_veloc,b_accel, &
-                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                  kappastore,mustore,jacobian,ibool, &
-                  ATTENUATION,deltat, &
-                  one_minus_sum_beta,factor_common, &
-                  b_alphaval,b_betaval,b_gammaval, &
-                  NSPEC_ATTENUATION_AB, &
-                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                  ANISOTROPY,NSPEC_ANISO, &
-                  c11store,c12store,c13store,c14store,c15store,c16store,&
-                  c22store,c23store,c24store,c25store,c26store,c33store,&
-                  c34store,c35store,c36store,c44store,c45store,c46store,&
-                  c55store,c56store,c66store, &
-                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                  is_moho_top,is_moho_bot, &
-                  b_dsdx_top,b_dsdx_bot, &
-                  ispec2D_moho_top,ispec2D_moho_bot, &
-                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                  phase_ispec_inner_elastic )
-
-  case (9)
-    call compute_forces_elastic_Dev_9p(iphase, NSPEC_AB,NGLOB_AB, &
-                  b_displ,b_veloc,b_accel, &
-                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                  kappastore,mustore,jacobian,ibool, &
-                  ATTENUATION,deltat, &
-                  one_minus_sum_beta,factor_common, &
-                  b_alphaval,b_betaval,b_gammaval, &
-                  NSPEC_ATTENUATION_AB, &
-                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                  ANISOTROPY,NSPEC_ANISO, &
-                  c11store,c12store,c13store,c14store,c15store,c16store,&
-                  c22store,c23store,c24store,c25store,c26store,c33store,&
-                  c34store,c35store,c36store,c44store,c45store,c46store,&
-                  c55store,c56store,c66store, &
-                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                  is_moho_top,is_moho_bot, &
-                  b_dsdx_top,b_dsdx_bot, &
-                  ispec2D_moho_top,ispec2D_moho_bot, &
-                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                  phase_ispec_inner_elastic )
-
-  case (10)
-    call compute_forces_elastic_Dev_10p(iphase, NSPEC_AB,NGLOB_AB, &
-                  b_displ,b_veloc,b_accel, &
-                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                  kappastore,mustore,jacobian,ibool, &
-                  ATTENUATION,deltat, &
-                  one_minus_sum_beta,factor_common, &
-                  b_alphaval,b_betaval,b_gammaval, &
-                  NSPEC_ATTENUATION_AB, &
-                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                  ANISOTROPY,NSPEC_ANISO, &
-                  c11store,c12store,c13store,c14store,c15store,c16store,&
-                  c22store,c23store,c24store,c25store,c26store,c33store,&
-                  c34store,c35store,c36store,c44store,c45store,c46store,&
-                  c55store,c56store,c66store, &
-                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                  is_moho_top,is_moho_bot, &
-                  b_dsdx_top,b_dsdx_bot, &
-                  ispec2D_moho_top,ispec2D_moho_bot, &
-                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                  phase_ispec_inner_elastic )
-
   case default
 
     stop 'error no Deville routine available for chosen NGLLX'

Deleted: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_Dev2.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_Dev2.f90	2013-01-16 16:59:14 UTC (rev 21237)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_Dev2.f90	2013-01-16 17:48:05 UTC (rev 21238)
@@ -1,4450 +0,0 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 1
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and CNRS / INRIA / University of Pau
-! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
-!                             July 2012
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-
-! higher-order Deville routines (NGLL == 6 to NGLL == 10 )
-!
-! note: put these routines into this extra file to avoid reaching internal threshold
-!          for vectorizations when compiling
-
-subroutine compute_forces_elastic_Dev_6p( iphase ,NSPEC_AB,NGLOB_AB, &
-                                    displ,veloc,accel, &
-                                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                                    hprime_xx,hprime_xxT, &
-                                    hprimewgll_xx,hprimewgll_xxT, &
-                                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                                    kappastore,mustore,jacobian,ibool, &
-                                    ATTENUATION,deltat, &
-                                    one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
-                                    NSPEC_ATTENUATION_AB, &
-                                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                                    ANISOTROPY,NSPEC_ANISO, &
-                                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                                    c55store,c56store,c66store, &
-                                    SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
-                                    is_moho_top,is_moho_bot, &
-                                    dsdx_top,dsdx_bot, &
-                                    ispec2D_moho_top,ispec2D_moho_bot, &
-                                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                                    phase_ispec_inner_elastic)
-
-
-! computes elastic tensor term
-
-  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
-                      N_SLS,SAVE_MOHO_MESH, &
-                      ONE_THIRD,FOUR_THIRDS,m1,m2
-  use fault_solver_dynamic, only : Kelvin_Voigt_eta
-
-  implicit none
-
-  integer :: NSPEC_AB,NGLOB_AB
-
-! displacement and acceleration
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,veloc,accel
-
-! time step
-  real(kind=CUSTOM_REAL) :: deltat
-
-! arrays with mesh parameters per slice
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        kappastore,mustore,jacobian
-
-! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,6) :: hprime_xx,hprimewgll_xxT
-  real(kind=CUSTOM_REAL), dimension(6,NGLLX) :: hprime_xxT,hprimewgll_xx
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-! memory variables and standard linear solids for attenuation
-  logical :: ATTENUATION
-  logical :: COMPUTE_AND_STORE_STRAIN
-  integer :: NSPEC_STRAIN_ONLY, NSPEC_ADJOINT
-  integer :: NSPEC_ATTENUATION_AB
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: one_minus_sum_beta
-  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: factor_common
-  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB,N_SLS) :: &
-      R_xx,R_yy,R_xy,R_xz,R_yz
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY) :: &
-       epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
-  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilon_trace_over_3
-
-! anisotropy
-  logical :: ANISOTROPY
-  integer :: NSPEC_ANISO
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
-            c11store,c12store,c13store,c14store,c15store,c16store, &
-            c22store,c23store,c24store,c25store,c26store,c33store, &
-            c34store,c35store,c36store,c44store,c45store,c46store, &
-            c55store,c56store,c66store
-
-  integer :: iphase
-  integer :: num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic
-  integer, dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
-
-! adjoint simulations
-  integer :: SIMULATION_TYPE
-  integer :: NSPEC_BOUN,NSPEC2D_MOHO
-
-  ! moho kernel
-  real(kind=CUSTOM_REAL),dimension(NDIM,NDIM,NGLLX,NGLLY,NGLLZ,NSPEC2D_MOHO):: &
-    dsdx_top,dsdx_bot
-  logical,dimension(NSPEC_BOUN) :: is_moho_top,is_moho_bot
-  integer :: ispec2D_moho_top, ispec2D_moho_bot
-
-! local parameters
-  real(kind=CUSTOM_REAL), dimension(6,6,6) :: dummyx_loc,dummyy_loc,dummyz_loc, &
-    newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
-  real(kind=CUSTOM_REAL), dimension(6,6,6) :: &
-    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
-
-  ! manually inline the calls to the Deville et al. (2002) routines
-  real(kind=CUSTOM_REAL), dimension(6,36) :: B1_m1_m2_6points,B2_m1_m2_6points,B3_m1_m2_6points
-  real(kind=CUSTOM_REAL), dimension(6,36) :: C1_m1_m2_6points,C2_m1_m2_6points,C3_m1_m2_6points
-  real(kind=CUSTOM_REAL), dimension(6,36) :: E1_m1_m2_6points,E2_m1_m2_6points,E3_m1_m2_6points
-
-  equivalence(dummyx_loc,B1_m1_m2_6points)
-  equivalence(dummyy_loc,B2_m1_m2_6points)
-  equivalence(dummyz_loc,B3_m1_m2_6points)
-  equivalence(tempx1,C1_m1_m2_6points)
-  equivalence(tempy1,C2_m1_m2_6points)
-  equivalence(tempz1,C3_m1_m2_6points)
-  equivalence(newtempx1,E1_m1_m2_6points)
-  equivalence(newtempy1,E2_m1_m2_6points)
-  equivalence(newtempz1,E3_m1_m2_6points)
-
-  real(kind=CUSTOM_REAL), dimension(6,6,6) :: &
-    tempx1_att,tempx2_att,tempx3_att,tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
-  real(kind=CUSTOM_REAL), dimension(6,6,6) :: dummyx_loc_att,dummyy_loc_att,dummyz_loc_att
-  real(kind=CUSTOM_REAL), dimension(6,36) :: B1_m1_m2_6points_att,B2_m1_m2_6points_att,B3_m1_m2_6points_att
-  real(kind=CUSTOM_REAL), dimension(6,36) :: C1_m1_m2_6points_att,C2_m1_m2_6points_att,C3_m1_m2_6points_att
-
-  equivalence(dummyx_loc_att,B1_m1_m2_6points_att)
-  equivalence(dummyy_loc_att,B2_m1_m2_6points_att)
-  equivalence(dummyz_loc_att,B3_m1_m2_6points_att)
-  equivalence(tempx1_att,C1_m1_m2_6points_att)
-  equivalence(tempy1_att,C2_m1_m2_6points_att)
-  equivalence(tempz1_att,C3_m1_m2_6points_att)
-
-  real(kind=CUSTOM_REAL), dimension(36,6) :: &
-    A1_mxm_m2_m1_6points,A2_mxm_m2_m1_6points,A3_mxm_m2_m1_6points
-  real(kind=CUSTOM_REAL), dimension(36,6) :: &
-    C1_mxm_m2_m1_6points,C2_mxm_m2_m1_6points,C3_mxm_m2_m1_6points
-  real(kind=CUSTOM_REAL), dimension(36,6) :: &
-    E1_mxm_m2_m1_6points,E2_mxm_m2_m1_6points,E3_mxm_m2_m1_6points
-
-  equivalence(dummyx_loc,A1_mxm_m2_m1_6points)
-  equivalence(dummyy_loc,A2_mxm_m2_m1_6points)
-  equivalence(dummyz_loc,A3_mxm_m2_m1_6points)
-  equivalence(tempx3,C1_mxm_m2_m1_6points)
-  equivalence(tempy3,C2_mxm_m2_m1_6points)
-  equivalence(tempz3,C3_mxm_m2_m1_6points)
-  equivalence(newtempx3,E1_mxm_m2_m1_6points)
-  equivalence(newtempy3,E2_mxm_m2_m1_6points)
-  equivalence(newtempz3,E3_mxm_m2_m1_6points)
-
-  real(kind=CUSTOM_REAL), dimension(36,6) :: &
-    A1_mxm_m2_m1_6points_att,A2_mxm_m2_m1_6points_att,A3_mxm_m2_m1_6points_att
-  real(kind=CUSTOM_REAL), dimension(36,6) :: &
-    C1_mxm_m2_m1_6points_att,C2_mxm_m2_m1_6points_att,C3_mxm_m2_m1_6points_att
-
-  equivalence(dummyx_loc_att,A1_mxm_m2_m1_6points_att)
-  equivalence(dummyy_loc_att,A2_mxm_m2_m1_6points_att)
-  equivalence(dummyz_loc_att,A3_mxm_m2_m1_6points_att)
-  equivalence(tempx3_att,C1_mxm_m2_m1_6points_att)
-  equivalence(tempy3_att,C2_mxm_m2_m1_6points_att)
-  equivalence(tempz3_att,C3_mxm_m2_m1_6points_att)
-
-  ! local attenuation parameters
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: epsilondev_xx_loc, &
-       epsilondev_yy_loc, epsilondev_xy_loc, epsilondev_xz_loc, epsilondev_yz_loc
-  real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3
-  real(kind=CUSTOM_REAL) factor_loc,alphaval_loc,betaval_loc,gammaval_loc
-  real(kind=CUSTOM_REAL) Sn,Snp1
-  real(kind=CUSTOM_REAL) templ
-
-  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
-  real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
-
-  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
-
-  real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy
-
-  real(kind=CUSTOM_REAL) fac1,fac2,fac3
-
-  real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
-  real(kind=CUSTOM_REAL) kappal
-
-  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
-  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att
-
-  ! local anisotropy parameters
-  real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
-                        c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
-
-  integer i_SLS,imodulo_N_SLS
-  integer ispec,iglob,ispec_p,num_elements
-  integer i,j,k
-
-  real(kind=CUSTOM_REAL) :: eta
-
-  imodulo_N_SLS = mod(N_SLS,3)
-
-  ! choses inner/outer elements
-  if( iphase == 1 ) then
-    num_elements = nspec_outer_elastic
-  else
-    num_elements = nspec_inner_elastic
-  endif
-
-  do ispec_p = 1,num_elements
-
-        ! returns element id from stored element list
-        ispec = phase_ispec_inner_elastic(ispec_p,iphase)
-
-        ! adjoint simulations: moho kernel
-        if( SIMULATION_TYPE == 3 .and. SAVE_MOHO_MESH ) then
-          if (is_moho_top(ispec)) then
-            ispec2D_moho_top = ispec2D_moho_top + 1
-          else if (is_moho_bot(ispec)) then
-            ispec2D_moho_bot = ispec2D_moho_bot + 1
-          endif
-        endif ! adjoint
-
-       ! Kelvin Voigt damping: artificial viscosity around dynamic faults
-
-        ! stores displacment values in local array
-        if (allocated(Kelvin_Voigt_eta)) then
-          eta = Kelvin_Voigt_eta(ispec)   
-          do k=1,NGLLZ
-            do j=1,NGLLY
-              do i=1,NGLLX
-                iglob = ibool(i,j,k,ispec)
-                dummyx_loc(i,j,k) = displ(1,iglob) + eta*veloc(1,iglob)
-                dummyy_loc(i,j,k) = displ(2,iglob) + eta*veloc(2,iglob)
-                dummyz_loc(i,j,k) = displ(3,iglob) + eta*veloc(3,iglob)
-              enddo
-            enddo
-          enddo
-
-        else
-          do k=1,NGLLZ
-            do j=1,NGLLY
-              do i=1,NGLLX
-                iglob = ibool(i,j,k,ispec)
-                dummyx_loc(i,j,k) = displ(1,iglob)
-                dummyy_loc(i,j,k) = displ(2,iglob)
-                dummyz_loc(i,j,k) = displ(3,iglob)
-              enddo
-            enddo
-          enddo
-        endif
-
-         
-        ! use first order Taylor expansion of displacement for local storage of stresses 
-        ! at this current time step, to fix attenuation in a consistent way
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           do k=1,NGLLZ
-              do j=1,NGLLY
-                 do i=1,NGLLX
-                    iglob = ibool(i,j,k,ispec)
-                    dummyx_loc_att(i,j,k) = deltat*veloc(1,iglob)
-                    dummyy_loc_att(i,j,k) = deltat*veloc(2,iglob)
-                    dummyz_loc_att(i,j,k) = deltat*veloc(3,iglob)
-                 enddo
-              enddo
-           enddo
-        endif
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-        ! call mxm_m1_m2_6points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
-        do j=1,m2
-          do i=1,m1
-            C1_m1_m2_6points(i,j) = hprime_xx(i,1)*B1_m1_m2_6points(1,j) + &
-                                  hprime_xx(i,2)*B1_m1_m2_6points(2,j) + &
-                                  hprime_xx(i,3)*B1_m1_m2_6points(3,j) + &
-                                  hprime_xx(i,4)*B1_m1_m2_6points(4,j) + &
-                                  hprime_xx(i,5)*B1_m1_m2_6points(5,j) + &
-                                  hprime_xx(i,6)*B1_m1_m2_6points(6,j)
-            C2_m1_m2_6points(i,j) = hprime_xx(i,1)*B2_m1_m2_6points(1,j) + &
-                                  hprime_xx(i,2)*B2_m1_m2_6points(2,j) + &
-                                  hprime_xx(i,3)*B2_m1_m2_6points(3,j) + &
-                                  hprime_xx(i,4)*B2_m1_m2_6points(4,j) + &
-                                  hprime_xx(i,5)*B2_m1_m2_6points(5,j) + &
-                                  hprime_xx(i,6)*B2_m1_m2_6points(6,j)
-            C3_m1_m2_6points(i,j) = hprime_xx(i,1)*B3_m1_m2_6points(1,j) + &
-                                  hprime_xx(i,2)*B3_m1_m2_6points(2,j) + &
-                                  hprime_xx(i,3)*B3_m1_m2_6points(3,j) + &
-                                  hprime_xx(i,4)*B3_m1_m2_6points(4,j) + &
-                                  hprime_xx(i,5)*B3_m1_m2_6points(5,j) + &
-                                  hprime_xx(i,6)*B3_m1_m2_6points(6,j)
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m2
-              do i=1,m1
-                 C1_m1_m2_6points_att(i,j) = C1_m1_m2_6points(i,j) + & 
-                      hprime_xx(i,1)*B1_m1_m2_6points_att(1,j) + &
-                      hprime_xx(i,2)*B1_m1_m2_6points_att(2,j) + &
-                      hprime_xx(i,3)*B1_m1_m2_6points_att(3,j) + &
-                      hprime_xx(i,4)*B1_m1_m2_6points_att(4,j) + &
-                      hprime_xx(i,5)*B1_m1_m2_6points_att(5,j) + &
-                      hprime_xx(i,6)*B1_m1_m2_6points_att(6,j)
-
-                 C2_m1_m2_6points_att(i,j) = C2_m1_m2_6points(i,j) + &
-                      hprime_xx(i,1)*B2_m1_m2_6points_att(1,j) + &
-                      hprime_xx(i,2)*B2_m1_m2_6points_att(2,j) + &
-                      hprime_xx(i,3)*B2_m1_m2_6points_att(3,j) + &
-                      hprime_xx(i,4)*B2_m1_m2_6points_att(4,j) + &
-                      hprime_xx(i,5)*B2_m1_m2_6points_att(5,j) + &
-                      hprime_xx(i,6)*B2_m1_m2_6points_att(6,j)
-
-                 C3_m1_m2_6points_att(i,j) = C3_m1_m2_6points(i,j) + &
-                      hprime_xx(i,1)*B3_m1_m2_6points_att(1,j) + &
-                      hprime_xx(i,2)*B3_m1_m2_6points_att(2,j) + &
-                      hprime_xx(i,3)*B3_m1_m2_6points_att(3,j) + &
-                      hprime_xx(i,4)*B3_m1_m2_6points_att(4,j) + &
-                      hprime_xx(i,5)*B3_m1_m2_6points_att(5,j) + &
-                      hprime_xx(i,6)*B3_m1_m2_6points_att(6,j)
-              enddo
-           enddo
-        endif
-
-        !   call mxm_m1_m1_6points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
-        !          hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
-        do j=1,m1
-          do i=1,m1
-            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-            do k = 1,NGLLX
-              tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyx_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyx_loc(i,6,k)*hprime_xxT(6,j)
-              tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyy_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyy_loc(i,6,k)*hprime_xxT(6,j)
-              tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyz_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyz_loc(i,6,k)*hprime_xxT(6,j)
-            enddo
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m1
-              do i=1,m1
-                 ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-                 do k = 1,NGLLX
-                    tempx2_att(i,j,k) = tempx2(i,j,k) + &
-                         dummyx_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyx_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyx_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyx_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyx_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyx_loc_att(i,6,k)*hprime_xxT(6,j)
-
-                    tempy2_att(i,j,k) = tempy2(i,j,k) + &
-                         dummyy_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyy_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyy_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyy_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyy_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyy_loc_att(i,6,k)*hprime_xxT(6,j)
-
-                    tempz2_att(i,j,k) = tempz2(i,j,k) + &
-                         dummyz_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyz_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyz_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyz_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyz_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyz_loc_att(i,6,k)*hprime_xxT(6,j)
-                 enddo
-              enddo
-           enddo
-        endif
-
-        ! call mxm_m2_m1_6points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
-        do j=1,m1
-          do i=1,m2
-            C1_mxm_m2_m1_6points(i,j) = A1_mxm_m2_m1_6points(i,1)*hprime_xxT(1,j) + &
-                                      A1_mxm_m2_m1_6points(i,2)*hprime_xxT(2,j) + &
-                                      A1_mxm_m2_m1_6points(i,3)*hprime_xxT(3,j) + &
-                                      A1_mxm_m2_m1_6points(i,4)*hprime_xxT(4,j) + &
-                                      A1_mxm_m2_m1_6points(i,5)*hprime_xxT(5,j) + &
-                                      A1_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j)
-            C2_mxm_m2_m1_6points(i,j) = A2_mxm_m2_m1_6points(i,1)*hprime_xxT(1,j) + &
-                                      A2_mxm_m2_m1_6points(i,2)*hprime_xxT(2,j) + &
-                                      A2_mxm_m2_m1_6points(i,3)*hprime_xxT(3,j) + &
-                                      A2_mxm_m2_m1_6points(i,4)*hprime_xxT(4,j) + &
-                                      A2_mxm_m2_m1_6points(i,5)*hprime_xxT(5,j) + &
-                                      A2_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j)
-            C3_mxm_m2_m1_6points(i,j) = A3_mxm_m2_m1_6points(i,1)*hprime_xxT(1,j) + &
-                                      A3_mxm_m2_m1_6points(i,2)*hprime_xxT(2,j) + &
-                                      A3_mxm_m2_m1_6points(i,3)*hprime_xxT(3,j) + &
-                                      A3_mxm_m2_m1_6points(i,4)*hprime_xxT(4,j) + &
-                                      A3_mxm_m2_m1_6points(i,5)*hprime_xxT(5,j) + &
-                                      A3_mxm_m2_m1_6points(i,6)*hprime_xxT(6,j)
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m1
-              do i=1,m2
-                 C1_mxm_m2_m1_6points_att(i,j) = C1_mxm_m2_m1_6points(i,j) + &
-                      A1_mxm_m2_m1_6points_att(i,1)*hprime_xxT(1,j) + &
-                      A1_mxm_m2_m1_6points_att(i,2)*hprime_xxT(2,j) + &
-                      A1_mxm_m2_m1_6points_att(i,3)*hprime_xxT(3,j) + &
-                      A1_mxm_m2_m1_6points_att(i,4)*hprime_xxT(4,j) + &
-                      A1_mxm_m2_m1_6points_att(i,5)*hprime_xxT(5,j) + &
-                      A1_mxm_m2_m1_6points_att(i,6)*hprime_xxT(6,j)
-
-                 C2_mxm_m2_m1_6points_att(i,j) = C2_mxm_m2_m1_6points(i,j) + &
-                      A2_mxm_m2_m1_6points_att(i,1)*hprime_xxT(1,j) + &
-                      A2_mxm_m2_m1_6points_att(i,2)*hprime_xxT(2,j) + &
-                      A2_mxm_m2_m1_6points_att(i,3)*hprime_xxT(3,j) + &
-                      A2_mxm_m2_m1_6points_att(i,4)*hprime_xxT(4,j) + &
-                      A2_mxm_m2_m1_6points_att(i,5)*hprime_xxT(5,j) + &
-                      A2_mxm_m2_m1_6points_att(i,6)*hprime_xxT(6,j)
-
-                 C3_mxm_m2_m1_6points_att(i,j) = C3_mxm_m2_m1_6points(i,j) + &
-                      A3_mxm_m2_m1_6points_att(i,1)*hprime_xxT(1,j) + &
-                      A3_mxm_m2_m1_6points_att(i,2)*hprime_xxT(2,j) + &
-                      A3_mxm_m2_m1_6points_att(i,3)*hprime_xxT(3,j) + &
-                      A3_mxm_m2_m1_6points_att(i,4)*hprime_xxT(4,j) + &
-                      A3_mxm_m2_m1_6points_att(i,5)*hprime_xxT(5,j) + &
-                      A3_mxm_m2_m1_6points_att(i,6)*hprime_xxT(6,j)
-              enddo
-           enddo
-        endif
-
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-              ! get derivatives of ux, uy and uz with respect to x, y and z
-              xixl = xix(i,j,k,ispec)
-              xiyl = xiy(i,j,k,ispec)
-              xizl = xiz(i,j,k,ispec)
-              etaxl = etax(i,j,k,ispec)
-              etayl = etay(i,j,k,ispec)
-              etazl = etaz(i,j,k,ispec)
-              gammaxl = gammax(i,j,k,ispec)
-              gammayl = gammay(i,j,k,ispec)
-              gammazl = gammaz(i,j,k,ispec)
-              jacobianl = jacobian(i,j,k,ispec)
-
-              duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
-              duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
-              duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
-
-              duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
-              duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
-              duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
-
-              duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
-              duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
-              duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
-
-              ! save strain on the Moho boundary
-              if (SAVE_MOHO_MESH ) then
-                if (is_moho_top(ispec)) then
-                  dsdx_top(1,1,i,j,k,ispec2D_moho_top) = duxdxl
-                  dsdx_top(1,2,i,j,k,ispec2D_moho_top) = duxdyl
-                  dsdx_top(1,3,i,j,k,ispec2D_moho_top) = duxdzl
-                  dsdx_top(2,1,i,j,k,ispec2D_moho_top) = duydxl
-                  dsdx_top(2,2,i,j,k,ispec2D_moho_top) = duydyl
-                  dsdx_top(2,3,i,j,k,ispec2D_moho_top) = duydzl
-                  dsdx_top(3,1,i,j,k,ispec2D_moho_top) = duzdxl
-                  dsdx_top(3,2,i,j,k,ispec2D_moho_top) = duzdyl
-                  dsdx_top(3,3,i,j,k,ispec2D_moho_top) = duzdzl
-                else if (is_moho_bot(ispec)) then
-                  dsdx_bot(1,1,i,j,k,ispec2D_moho_bot) = duxdxl
-                  dsdx_bot(1,2,i,j,k,ispec2D_moho_bot) = duxdyl
-                  dsdx_bot(1,3,i,j,k,ispec2D_moho_bot) = duxdzl
-                  dsdx_bot(2,1,i,j,k,ispec2D_moho_bot) = duydxl
-                  dsdx_bot(2,2,i,j,k,ispec2D_moho_bot) = duydyl
-                  dsdx_bot(2,3,i,j,k,ispec2D_moho_bot) = duydzl
-                  dsdx_bot(3,1,i,j,k,ispec2D_moho_bot) = duzdxl
-                  dsdx_bot(3,2,i,j,k,ispec2D_moho_bot) = duzdyl
-                  dsdx_bot(3,3,i,j,k,ispec2D_moho_bot) = duzdzl
-                endif
-              endif
-
-              ! precompute some sums to save CPU time
-              duxdxl_plus_duydyl = duxdxl + duydyl
-              duxdxl_plus_duzdzl = duxdxl + duzdzl
-              duydyl_plus_duzdzl = duydyl + duzdzl
-              duxdyl_plus_duydxl = duxdyl + duydxl
-              duzdxl_plus_duxdzl = duzdxl + duxdzl
-              duzdyl_plus_duydzl = duzdyl + duydzl
-
-              if ( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
-                 ! temporary variables used for fixing attenuation in a consistent way
-                 duxdxl_att = xixl*tempx1_att(i,j,k) + etaxl*tempx2_att(i,j,k) + gammaxl*tempx3_att(i,j,k)
-                 duxdyl_att = xiyl*tempx1_att(i,j,k) + etayl*tempx2_att(i,j,k) + gammayl*tempx3_att(i,j,k)
-                 duxdzl_att = xizl*tempx1_att(i,j,k) + etazl*tempx2_att(i,j,k) + gammazl*tempx3_att(i,j,k)
-
-                 duydxl_att = xixl*tempy1_att(i,j,k) + etaxl*tempy2_att(i,j,k) + gammaxl*tempy3_att(i,j,k)
-                 duydyl_att = xiyl*tempy1_att(i,j,k) + etayl*tempy2_att(i,j,k) + gammayl*tempy3_att(i,j,k)
-                 duydzl_att = xizl*tempy1_att(i,j,k) + etazl*tempy2_att(i,j,k) + gammazl*tempy3_att(i,j,k)
-
-                 duzdxl_att = xixl*tempz1_att(i,j,k) + etaxl*tempz2_att(i,j,k) + gammaxl*tempz3_att(i,j,k)
-                 duzdyl_att = xiyl*tempz1_att(i,j,k) + etayl*tempz2_att(i,j,k) + gammayl*tempz3_att(i,j,k)
-                 duzdzl_att = xizl*tempz1_att(i,j,k) + etazl*tempz2_att(i,j,k) + gammazl*tempz3_att(i,j,k)
-
-                 ! precompute some sums to save CPU time
-                 duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
-                 duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
-                 duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
-
-                 ! compute deviatoric strain
-                 templ = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
-                 if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                 epsilondev_xx_loc(i,j,k) = duxdxl_att - templ
-                 epsilondev_yy_loc(i,j,k) = duydyl_att - templ
-                 epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl_att
-                 epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
-                 epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl_att
-              else
-                 ! computes deviatoric strain attenuation and/or for kernel calculations
-                 if (COMPUTE_AND_STORE_STRAIN) then
-                    templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
-                    if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                    epsilondev_xx_loc(i,j,k) = duxdxl - templ
-                    epsilondev_yy_loc(i,j,k) = duydyl - templ
-                    epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl
-                    epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl
-                    epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl
-                 endif
-              endif
-
-              kappal = kappastore(i,j,k,ispec)
-              mul = mustore(i,j,k,ispec)
-
-              ! attenuation
-              if(ATTENUATION) then
-                ! use unrelaxed parameters if attenuation
-                mul  = mul * one_minus_sum_beta(i,j,k,ispec)
-              endif
-
-  ! full anisotropic case, stress calculations
-              if(ANISOTROPY) then
-                c11 = c11store(i,j,k,ispec)
-                c12 = c12store(i,j,k,ispec)
-                c13 = c13store(i,j,k,ispec)
-                c14 = c14store(i,j,k,ispec)
-                c15 = c15store(i,j,k,ispec)
-                c16 = c16store(i,j,k,ispec)
-                c22 = c22store(i,j,k,ispec)
-                c23 = c23store(i,j,k,ispec)
-                c24 = c24store(i,j,k,ispec)
-                c25 = c25store(i,j,k,ispec)
-                c26 = c26store(i,j,k,ispec)
-                c33 = c33store(i,j,k,ispec)
-                c34 = c34store(i,j,k,ispec)
-                c35 = c35store(i,j,k,ispec)
-                c36 = c36store(i,j,k,ispec)
-                c44 = c44store(i,j,k,ispec)
-                c45 = c45store(i,j,k,ispec)
-                c46 = c46store(i,j,k,ispec)
-                c55 = c55store(i,j,k,ispec)
-                c56 = c56store(i,j,k,ispec)
-                c66 = c66store(i,j,k,ispec)
-
-                sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
-                          c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
-                sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
-                          c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
-                sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
-                          c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
-                sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
-                          c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
-                sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
-                          c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
-                sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
-                          c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
-
-              else
-
-  ! isotropic case
-                lambdalplus2mul = kappal + FOUR_THIRDS * mul
-                lambdal = lambdalplus2mul - 2.*mul
-
-                ! compute stress sigma
-                sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
-                sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
-                sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
-
-                sigma_xy = mul*duxdyl_plus_duydxl
-                sigma_xz = mul*duzdxl_plus_duxdzl
-                sigma_yz = mul*duzdyl_plus_duydzl
-
-              endif ! ANISOTROPY
-
-              ! subtract memory variables if attenuation
-              if(ATTENUATION) then
-! way 1
-!                do i_sls = 1,N_SLS
-!                  R_xx_val = R_xx(i,j,k,ispec,i_sls)
-!                  R_yy_val = R_yy(i,j,k,ispec,i_sls)
-!                  sigma_xx = sigma_xx - R_xx_val
-!                  sigma_yy = sigma_yy - R_yy_val
-!                  sigma_zz = sigma_zz + R_xx_val + R_yy_val
-!                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-!                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-!                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-!                enddo
-
-! way 2
-! note: this should help compilers to pipeline the code and make better use of the cache;
-!          depending on compilers, it can further decrease the computation time by ~ 30%.
-!          by default, N_SLS = 3, therefore we take steps of 3
-              if(imodulo_N_SLS >= 1) then
-                do i_sls = 1,imodulo_N_SLS
-                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                  sigma_xx = sigma_xx - R_xx_val1
-                  sigma_yy = sigma_yy - R_yy_val1
-                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-                enddo
-              endif
-
-              if(N_SLS >= imodulo_N_SLS+1) then
-                do i_sls = imodulo_N_SLS+1,N_SLS,3
-                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                  sigma_xx = sigma_xx - R_xx_val1
-                  sigma_yy = sigma_yy - R_yy_val1
-                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-
-                  R_xx_val2 = R_xx(i,j,k,ispec,i_sls+1)
-                  R_yy_val2 = R_yy(i,j,k,ispec,i_sls+1)
-                  sigma_xx = sigma_xx - R_xx_val2
-                  sigma_yy = sigma_yy - R_yy_val2
-                  sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+1)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+1)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+1)
-
-                  R_xx_val3 = R_xx(i,j,k,ispec,i_sls+2)
-                  R_yy_val3 = R_yy(i,j,k,ispec,i_sls+2)
-                  sigma_xx = sigma_xx - R_xx_val3
-                  sigma_yy = sigma_yy - R_yy_val3
-                  sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+2)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+2)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+2)
-                enddo
-              endif
-
-
-              endif
-
-            ! define symmetric components of sigma
-            sigma_yx = sigma_xy
-            sigma_zx = sigma_xz
-            sigma_zy = sigma_yz
-
-            ! form dot product with test vector, non-symmetric form (which is useful in the case of PML)
-            tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
-            tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
-            tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
-
-            tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
-            tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
-            tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
-
-            tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
-            tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
-            tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
-
-            enddo
-          enddo
-        enddo
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-        ! call mxm_m1_m2_6points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
-        do j=1,m2
-          do i=1,m1
-            E1_m1_m2_6points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_6points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C1_m1_m2_6points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C1_m1_m2_6points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C1_m1_m2_6points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C1_m1_m2_6points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C1_m1_m2_6points(6,j)
-            E2_m1_m2_6points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_6points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C2_m1_m2_6points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C2_m1_m2_6points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C2_m1_m2_6points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C2_m1_m2_6points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C2_m1_m2_6points(6,j)
-            E3_m1_m2_6points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_6points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C3_m1_m2_6points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C3_m1_m2_6points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C3_m1_m2_6points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C3_m1_m2_6points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C3_m1_m2_6points(6,j)
-          enddo
-        enddo
-
-        !   call mxm_m1_m1_6points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
-        !         hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
-        do i=1,m1
-          do j=1,m1
-            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-            do k = 1,NGLLX
-              newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempx2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempx2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempx2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempx2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempx2(i,6,k)*hprimewgll_xx(6,j)
-              newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempy2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempy2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempy2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempy2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempy2(i,6,k)*hprimewgll_xx(6,j)
-              newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempz2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempz2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempz2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempz2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempz2(i,6,k)*hprimewgll_xx(6,j)
-            enddo
-          enddo
-        enddo
-
-        ! call mxm_m2_m1_6points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
-        do j=1,m1
-          do i=1,m2
-            E1_mxm_m2_m1_6points(i,j) = C1_mxm_m2_m1_6points(i,1)*hprimewgll_xx(1,j) + &
-                                      C1_mxm_m2_m1_6points(i,2)*hprimewgll_xx(2,j) + &
-                                      C1_mxm_m2_m1_6points(i,3)*hprimewgll_xx(3,j) + &
-                                      C1_mxm_m2_m1_6points(i,4)*hprimewgll_xx(4,j) + &
-                                      C1_mxm_m2_m1_6points(i,5)*hprimewgll_xx(5,j) + &
-                                      C1_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j)
-            E2_mxm_m2_m1_6points(i,j) = C2_mxm_m2_m1_6points(i,1)*hprimewgll_xx(1,j) + &
-                                      C2_mxm_m2_m1_6points(i,2)*hprimewgll_xx(2,j) + &
-                                      C2_mxm_m2_m1_6points(i,3)*hprimewgll_xx(3,j) + &
-                                      C2_mxm_m2_m1_6points(i,4)*hprimewgll_xx(4,j) + &
-                                      C2_mxm_m2_m1_6points(i,5)*hprimewgll_xx(5,j) + &
-                                      C2_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j)
-            E3_mxm_m2_m1_6points(i,j) = C3_mxm_m2_m1_6points(i,1)*hprimewgll_xx(1,j) + &
-                                      C3_mxm_m2_m1_6points(i,2)*hprimewgll_xx(2,j) + &
-                                      C3_mxm_m2_m1_6points(i,3)*hprimewgll_xx(3,j) + &
-                                      C3_mxm_m2_m1_6points(i,4)*hprimewgll_xx(4,j) + &
-                                      C3_mxm_m2_m1_6points(i,5)*hprimewgll_xx(5,j) + &
-                                      C3_mxm_m2_m1_6points(i,6)*hprimewgll_xx(6,j)
-          enddo
-        enddo
-
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-
-              fac1 = wgllwgll_yz(j,k)
-              fac2 = wgllwgll_xz(i,k)
-              fac3 = wgllwgll_xy(i,j)
-
-              ! sum contributions from each element to the global mesh using indirect addressing
-              iglob = ibool(i,j,k,ispec)
-              accel(1,iglob) = accel(1,iglob) - fac1*newtempx1(i,j,k) - &
-                                fac2*newtempx2(i,j,k) - fac3*newtempx3(i,j,k)
-              accel(2,iglob) = accel(2,iglob) - fac1*newtempy1(i,j,k) - &
-                                fac2*newtempy2(i,j,k) - fac3*newtempy3(i,j,k)
-              accel(3,iglob) = accel(3,iglob) - fac1*newtempz1(i,j,k) - &
-                                fac2*newtempz2(i,j,k) - fac3*newtempz3(i,j,k)
-
-              !  update memory variables based upon the Runge-Kutta scheme
-              if(ATTENUATION) then
-
-                 ! use Runge-Kutta scheme to march in time
-                 do i_sls = 1,N_SLS
-
-                    factor_loc = mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
-
-                    alphaval_loc = alphaval(i_sls)
-                    betaval_loc = betaval(i_sls)
-                    gammaval_loc = gammaval(i_sls)
-
-                    ! term in xx
-                    Sn   = factor_loc * epsilondev_xx(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xx_loc(i,j,k)
-                    R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in yy
-                    Sn   = factor_loc * epsilondev_yy(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_yy_loc(i,j,k)
-                    R_yy(i,j,k,ispec,i_sls) = alphaval_loc * R_yy(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in zz not computed since zero trace
-                    ! term in xy
-                    Sn   = factor_loc * epsilondev_xy(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xy_loc(i,j,k)
-                    R_xy(i,j,k,ispec,i_sls) = alphaval_loc * R_xy(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in xz
-                    Sn   = factor_loc * epsilondev_xz(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xz_loc(i,j,k)
-                    R_xz(i,j,k,ispec,i_sls) = alphaval_loc * R_xz(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in yz
-                    Sn   = factor_loc * epsilondev_yz(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_yz_loc(i,j,k)
-                    R_yz(i,j,k,ispec,i_sls) = alphaval_loc * R_yz(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-
-                 enddo   ! end of loop on memory variables
-
-              endif  !  end attenuation
-
-            enddo
-          enddo
-        enddo
-
-        ! save deviatoric strain for Runge-Kutta scheme
-        if ( COMPUTE_AND_STORE_STRAIN ) then
-          epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
-          epsilondev_yy(:,:,:,ispec) = epsilondev_yy_loc(:,:,:)
-          epsilondev_xy(:,:,:,ispec) = epsilondev_xy_loc(:,:,:)
-          epsilondev_xz(:,:,:,ispec) = epsilondev_xz_loc(:,:,:)
-          epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:)
-        endif
-
-  enddo  ! spectral element loop
-
-end subroutine compute_forces_elastic_Dev_6p
-
-!
-!=====================================================================
-!
-
-subroutine compute_forces_elastic_Dev_7p( iphase ,NSPEC_AB,NGLOB_AB, &
-                                    displ,veloc,accel, &
-                                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                                    hprime_xx,hprime_xxT, &
-                                    hprimewgll_xx,hprimewgll_xxT, &
-                                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                                    kappastore,mustore,jacobian,ibool, &
-                                    ATTENUATION,deltat, &
-                                    one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
-                                    NSPEC_ATTENUATION_AB, &
-                                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                                    ANISOTROPY,NSPEC_ANISO, &
-                                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                                    c55store,c56store,c66store, &
-                                    SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
-                                    is_moho_top,is_moho_bot, &
-                                    dsdx_top,dsdx_bot, &
-                                    ispec2D_moho_top,ispec2D_moho_bot, &
-                                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                                    phase_ispec_inner_elastic)
-
-
-! computes elastic tensor term
-
-  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
-                      N_SLS,SAVE_MOHO_MESH, &
-                      ONE_THIRD,FOUR_THIRDS,m1,m2
-  implicit none
-
-  integer :: NSPEC_AB,NGLOB_AB
-
-! displacement, velocity and acceleration
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,veloc,accel
-
-! time step
-  real(kind=CUSTOM_REAL) :: deltat
-
-! arrays with mesh parameters per slice
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        kappastore,mustore,jacobian
-
-! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,7) :: hprime_xx,hprimewgll_xxT
-  real(kind=CUSTOM_REAL), dimension(7,NGLLX) :: hprime_xxT,hprimewgll_xx
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-! memory variables and standard linear solids for attenuation
-  logical :: ATTENUATION
-  logical :: COMPUTE_AND_STORE_STRAIN
-  integer :: NSPEC_STRAIN_ONLY, NSPEC_ADJOINT
-  integer :: NSPEC_ATTENUATION_AB
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: one_minus_sum_beta
-  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: factor_common
-  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB,N_SLS) :: &
-      R_xx,R_yy,R_xy,R_xz,R_yz
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY) :: &
-       epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
-  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilon_trace_over_3
-
-! anisotropy
-  logical :: ANISOTROPY
-  integer :: NSPEC_ANISO
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
-            c11store,c12store,c13store,c14store,c15store,c16store, &
-            c22store,c23store,c24store,c25store,c26store,c33store, &
-            c34store,c35store,c36store,c44store,c45store,c46store, &
-            c55store,c56store,c66store
-
-  integer :: iphase
-  integer :: num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic
-  integer, dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
-
-! adjoint simulations
-  integer :: SIMULATION_TYPE
-  integer :: NSPEC_BOUN,NSPEC2D_MOHO
-
-  ! moho kernel
-  real(kind=CUSTOM_REAL),dimension(NDIM,NDIM,NGLLX,NGLLY,NGLLZ,NSPEC2D_MOHO):: &
-    dsdx_top,dsdx_bot
-  logical,dimension(NSPEC_BOUN) :: is_moho_top,is_moho_bot
-  integer :: ispec2D_moho_top, ispec2D_moho_bot
-
-! local parameters
-  real(kind=CUSTOM_REAL), dimension(7,7,7) :: dummyx_loc,dummyy_loc,dummyz_loc, &
-    newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
-  real(kind=CUSTOM_REAL), dimension(7,7,7) :: &
-    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
-
-  ! manually inline the calls to the Deville et al. (2002) routines
-  real(kind=CUSTOM_REAL), dimension(7,49) :: B1_m1_m2_7points,B2_m1_m2_7points,B3_m1_m2_7points
-  real(kind=CUSTOM_REAL), dimension(7,49) :: C1_m1_m2_7points,C2_m1_m2_7points,C3_m1_m2_7points
-  real(kind=CUSTOM_REAL), dimension(7,49) :: E1_m1_m2_7points,E2_m1_m2_7points,E3_m1_m2_7points
-
-  equivalence(dummyx_loc,B1_m1_m2_7points)
-  equivalence(dummyy_loc,B2_m1_m2_7points)
-  equivalence(dummyz_loc,B3_m1_m2_7points)
-  equivalence(tempx1,C1_m1_m2_7points)
-  equivalence(tempy1,C2_m1_m2_7points)
-  equivalence(tempz1,C3_m1_m2_7points)
-  equivalence(newtempx1,E1_m1_m2_7points)
-  equivalence(newtempy1,E2_m1_m2_7points)
-  equivalence(newtempz1,E3_m1_m2_7points)
-
-  real(kind=CUSTOM_REAL), dimension(7,7,7) :: &
-    tempx1_att,tempx2_att,tempx3_att,tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
-  real(kind=CUSTOM_REAL), dimension(7,7,7) :: dummyx_loc_att,dummyy_loc_att,dummyz_loc_att
-  real(kind=CUSTOM_REAL), dimension(7,49) :: B1_m1_m2_7points_att,B2_m1_m2_7points_att,B3_m1_m2_7points_att
-  real(kind=CUSTOM_REAL), dimension(7,49) :: C1_m1_m2_7points_att,C2_m1_m2_7points_att,C3_m1_m2_7points_att
-
-  equivalence(dummyx_loc_att,B1_m1_m2_7points_att)
-  equivalence(dummyy_loc_att,B2_m1_m2_7points_att)
-  equivalence(dummyz_loc_att,B3_m1_m2_7points_att)
-  equivalence(tempx1_att,C1_m1_m2_7points_att)
-  equivalence(tempy1_att,C2_m1_m2_7points_att)
-  equivalence(tempz1_att,C3_m1_m2_7points_att)
-
-  real(kind=CUSTOM_REAL), dimension(49,7) :: &
-    A1_mxm_m2_m1_7points,A2_mxm_m2_m1_7points,A3_mxm_m2_m1_7points
-  real(kind=CUSTOM_REAL), dimension(49,7) :: &
-    C1_mxm_m2_m1_7points,C2_mxm_m2_m1_7points,C3_mxm_m2_m1_7points
-  real(kind=CUSTOM_REAL), dimension(49,7) :: &
-    E1_mxm_m2_m1_7points,E2_mxm_m2_m1_7points,E3_mxm_m2_m1_7points
-
-  equivalence(dummyx_loc,A1_mxm_m2_m1_7points)
-  equivalence(dummyy_loc,A2_mxm_m2_m1_7points)
-  equivalence(dummyz_loc,A3_mxm_m2_m1_7points)
-  equivalence(tempx3,C1_mxm_m2_m1_7points)
-  equivalence(tempy3,C2_mxm_m2_m1_7points)
-  equivalence(tempz3,C3_mxm_m2_m1_7points)
-  equivalence(newtempx3,E1_mxm_m2_m1_7points)
-  equivalence(newtempy3,E2_mxm_m2_m1_7points)
-  equivalence(newtempz3,E3_mxm_m2_m1_7points)
-
-  real(kind=CUSTOM_REAL), dimension(49,7) :: &
-    A1_mxm_m2_m1_7points_att,A2_mxm_m2_m1_7points_att,A3_mxm_m2_m1_7points_att
-  real(kind=CUSTOM_REAL), dimension(49,7) :: &
-    C1_mxm_m2_m1_7points_att,C2_mxm_m2_m1_7points_att,C3_mxm_m2_m1_7points_att
-
-  equivalence(dummyx_loc_att,A1_mxm_m2_m1_7points_att)
-  equivalence(dummyy_loc_att,A2_mxm_m2_m1_7points_att)
-  equivalence(dummyz_loc_att,A3_mxm_m2_m1_7points_att)
-  equivalence(tempx3_att,C1_mxm_m2_m1_7points_att)
-  equivalence(tempy3_att,C2_mxm_m2_m1_7points_att)
-  equivalence(tempz3_att,C3_mxm_m2_m1_7points_att)
-
-  ! local attenuation parameters
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: epsilondev_xx_loc, &
-       epsilondev_yy_loc, epsilondev_xy_loc, epsilondev_xz_loc, epsilondev_yz_loc
-  real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3
-  real(kind=CUSTOM_REAL) factor_loc,alphaval_loc,betaval_loc,gammaval_loc
-  real(kind=CUSTOM_REAL) Sn,Snp1
-  real(kind=CUSTOM_REAL) templ
-
-  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
-  real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
-
-  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
-
-  real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy
-
-  real(kind=CUSTOM_REAL) fac1,fac2,fac3
-
-  real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
-  real(kind=CUSTOM_REAL) kappal
-
-  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
-  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att
-
-  ! local anisotropy parameters
-  real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
-                        c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
-
-  integer i_SLS,imodulo_N_SLS
-  integer ispec,iglob,ispec_p,num_elements
-  integer i,j,k
-
-  imodulo_N_SLS = mod(N_SLS,3)
-
-  ! choses inner/outer elements
-  if( iphase == 1 ) then
-    num_elements = nspec_outer_elastic
-  else
-    num_elements = nspec_inner_elastic
-  endif
-
-  do ispec_p = 1,num_elements
-
-        ! returns element id from stored element list
-        ispec = phase_ispec_inner_elastic(ispec_p,iphase)
-
-        ! adjoint simulations: moho kernel
-        if( SIMULATION_TYPE == 3 .and. SAVE_MOHO_MESH ) then
-          if (is_moho_top(ispec)) then
-            ispec2D_moho_top = ispec2D_moho_top + 1
-          else if (is_moho_bot(ispec)) then
-            ispec2D_moho_bot = ispec2D_moho_bot + 1
-          endif
-        endif ! adjoint
-
-        ! stores displacment values in local array
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-                iglob = ibool(i,j,k,ispec)
-                dummyx_loc(i,j,k) = displ(1,iglob)
-                dummyy_loc(i,j,k) = displ(2,iglob)
-                dummyz_loc(i,j,k) = displ(3,iglob)
-            enddo
-          enddo
-        enddo
-
-        ! use first order Taylor expansion of displacement for local storage of stresses 
-        ! at this current time step, to fix attenuation in a consistent way
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           do k=1,NGLLZ
-              do j=1,NGLLY
-                 do i=1,NGLLX
-                    iglob = ibool(i,j,k,ispec)
-                    dummyx_loc_att(i,j,k) = deltat*veloc(1,iglob)
-                    dummyy_loc_att(i,j,k) = deltat*veloc(2,iglob)
-                    dummyz_loc_att(i,j,k) = deltat*veloc(3,iglob)
-                 enddo
-              enddo
-           enddo
-        endif
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-        ! call mxm_m1_m2_7points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
-        do j=1,m2
-          do i=1,m1
-            C1_m1_m2_7points(i,j) = hprime_xx(i,1)*B1_m1_m2_7points(1,j) + &
-                                  hprime_xx(i,2)*B1_m1_m2_7points(2,j) + &
-                                  hprime_xx(i,3)*B1_m1_m2_7points(3,j) + &
-                                  hprime_xx(i,4)*B1_m1_m2_7points(4,j) + &
-                                  hprime_xx(i,5)*B1_m1_m2_7points(5,j) + &
-                                  hprime_xx(i,6)*B1_m1_m2_7points(6,j) + &
-                                  hprime_xx(i,7)*B1_m1_m2_7points(7,j)
-            C2_m1_m2_7points(i,j) = hprime_xx(i,1)*B2_m1_m2_7points(1,j) + &
-                                  hprime_xx(i,2)*B2_m1_m2_7points(2,j) + &
-                                  hprime_xx(i,3)*B2_m1_m2_7points(3,j) + &
-                                  hprime_xx(i,4)*B2_m1_m2_7points(4,j) + &
-                                  hprime_xx(i,5)*B2_m1_m2_7points(5,j) + &
-                                  hprime_xx(i,6)*B2_m1_m2_7points(6,j) + &
-                                  hprime_xx(i,7)*B2_m1_m2_7points(7,j)
-            C3_m1_m2_7points(i,j) = hprime_xx(i,1)*B3_m1_m2_7points(1,j) + &
-                                  hprime_xx(i,2)*B3_m1_m2_7points(2,j) + &
-                                  hprime_xx(i,3)*B3_m1_m2_7points(3,j) + &
-                                  hprime_xx(i,4)*B3_m1_m2_7points(4,j) + &
-                                  hprime_xx(i,5)*B3_m1_m2_7points(5,j) + &
-                                  hprime_xx(i,6)*B3_m1_m2_7points(6,j) + &
-                                  hprime_xx(i,7)*B3_m1_m2_7points(7,j)
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m2
-              do i=1,m1
-                 C1_m1_m2_7points_att(i,j) = C1_m1_m2_7points(i,j) + & 
-                      hprime_xx(i,1)*B1_m1_m2_7points_att(1,j) + &
-                      hprime_xx(i,2)*B1_m1_m2_7points_att(2,j) + &
-                      hprime_xx(i,3)*B1_m1_m2_7points_att(3,j) + &
-                      hprime_xx(i,4)*B1_m1_m2_7points_att(4,j) + &
-                      hprime_xx(i,5)*B1_m1_m2_7points_att(5,j) + &
-                      hprime_xx(i,6)*B1_m1_m2_7points_att(6,j) + &
-                      hprime_xx(i,7)*B1_m1_m2_7points_att(7,j)
-
-                 C2_m1_m2_7points_att(i,j) = C2_m1_m2_7points(i,j) + &
-                      hprime_xx(i,1)*B2_m1_m2_7points_att(1,j) + &
-                      hprime_xx(i,2)*B2_m1_m2_7points_att(2,j) + &
-                      hprime_xx(i,3)*B2_m1_m2_7points_att(3,j) + &
-                      hprime_xx(i,4)*B2_m1_m2_7points_att(4,j) + &
-                      hprime_xx(i,5)*B2_m1_m2_7points_att(5,j) + &
-                      hprime_xx(i,6)*B2_m1_m2_7points_att(6,j) + &
-                      hprime_xx(i,7)*B2_m1_m2_7points_att(7,j)
-
-                 C3_m1_m2_7points_att(i,j) = C3_m1_m2_7points(i,j) + &
-                      hprime_xx(i,1)*B3_m1_m2_7points_att(1,j) + &
-                      hprime_xx(i,2)*B3_m1_m2_7points_att(2,j) + &
-                      hprime_xx(i,3)*B3_m1_m2_7points_att(3,j) + &
-                      hprime_xx(i,4)*B3_m1_m2_7points_att(4,j) + &
-                      hprime_xx(i,5)*B3_m1_m2_7points_att(5,j) + &
-                      hprime_xx(i,6)*B3_m1_m2_7points_att(6,j) + &
-                      hprime_xx(i,7)*B3_m1_m2_7points_att(7,j)
-              enddo
-           enddo
-        endif
-
-        !   call mxm_m1_m1_7points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
-        !          hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
-        do j=1,m1
-          do i=1,m1
-            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-            do k = 1,NGLLX
-              tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyx_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyx_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyx_loc(i,7,k)*hprime_xxT(7,j)
-              tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyy_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyy_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyy_loc(i,7,k)*hprime_xxT(7,j)
-              tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyz_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyz_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyz_loc(i,7,k)*hprime_xxT(7,j)
-            enddo
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m1
-              do i=1,m1
-                 ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-                 do k = 1,NGLLX
-                    tempx2_att(i,j,k) = tempx2(i,j,k) + &
-                         dummyx_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyx_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyx_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyx_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyx_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyx_loc_att(i,6,k)*hprime_xxT(6,j) + &
-                         dummyx_loc_att(i,7,k)*hprime_xxT(7,j)
-
-                    tempy2_att(i,j,k) = tempy2(i,j,k) + &
-                         dummyy_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyy_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyy_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyy_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyy_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyy_loc_att(i,6,k)*hprime_xxT(6,j) + &
-                         dummyy_loc_att(i,7,k)*hprime_xxT(7,j)
-
-                    tempz2_att(i,j,k) = tempz2(i,j,k) + &
-                         dummyz_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyz_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyz_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyz_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyz_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyz_loc_att(i,6,k)*hprime_xxT(6,j) + &
-                         dummyz_loc_att(i,7,k)*hprime_xxT(7,j)
-                 enddo
-              enddo
-           enddo
-        endif
-
-        ! call mxm_m2_m1_7points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
-        do j=1,m1
-          do i=1,m2
-            C1_mxm_m2_m1_7points(i,j) = A1_mxm_m2_m1_7points(i,1)*hprime_xxT(1,j) + &
-                                      A1_mxm_m2_m1_7points(i,2)*hprime_xxT(2,j) + &
-                                      A1_mxm_m2_m1_7points(i,3)*hprime_xxT(3,j) + &
-                                      A1_mxm_m2_m1_7points(i,4)*hprime_xxT(4,j) + &
-                                      A1_mxm_m2_m1_7points(i,5)*hprime_xxT(5,j) + &
-                                      A1_mxm_m2_m1_7points(i,6)*hprime_xxT(6,j) + &
-                                      A1_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j)
-            C2_mxm_m2_m1_7points(i,j) = A2_mxm_m2_m1_7points(i,1)*hprime_xxT(1,j) + &
-                                      A2_mxm_m2_m1_7points(i,2)*hprime_xxT(2,j) + &
-                                      A2_mxm_m2_m1_7points(i,3)*hprime_xxT(3,j) + &
-                                      A2_mxm_m2_m1_7points(i,4)*hprime_xxT(4,j) + &
-                                      A2_mxm_m2_m1_7points(i,5)*hprime_xxT(5,j) + &
-                                      A2_mxm_m2_m1_7points(i,6)*hprime_xxT(6,j) + &
-                                      A2_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j)
-            C3_mxm_m2_m1_7points(i,j) = A3_mxm_m2_m1_7points(i,1)*hprime_xxT(1,j) + &
-                                      A3_mxm_m2_m1_7points(i,2)*hprime_xxT(2,j) + &
-                                      A3_mxm_m2_m1_7points(i,3)*hprime_xxT(3,j) + &
-                                      A3_mxm_m2_m1_7points(i,4)*hprime_xxT(4,j) + &
-                                      A3_mxm_m2_m1_7points(i,5)*hprime_xxT(5,j) + &
-                                      A3_mxm_m2_m1_7points(i,6)*hprime_xxT(6,j) + &
-                                      A3_mxm_m2_m1_7points(i,7)*hprime_xxT(7,j)
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m1
-              do i=1,m2
-                 C1_mxm_m2_m1_7points_att(i,j) = C1_mxm_m2_m1_7points(i,j) + &
-                      A1_mxm_m2_m1_7points_att(i,1)*hprime_xxT(1,j) + &
-                      A1_mxm_m2_m1_7points_att(i,2)*hprime_xxT(2,j) + &
-                      A1_mxm_m2_m1_7points_att(i,3)*hprime_xxT(3,j) + &
-                      A1_mxm_m2_m1_7points_att(i,4)*hprime_xxT(4,j) + &
-                      A1_mxm_m2_m1_7points_att(i,5)*hprime_xxT(5,j) + &
-                      A1_mxm_m2_m1_7points_att(i,6)*hprime_xxT(6,j) + &
-                      A1_mxm_m2_m1_7points_att(i,7)*hprime_xxT(7,j)
-
-                 C2_mxm_m2_m1_7points_att(i,j) = C2_mxm_m2_m1_7points(i,j) + &
-                      A2_mxm_m2_m1_7points_att(i,1)*hprime_xxT(1,j) + &
-                      A2_mxm_m2_m1_7points_att(i,2)*hprime_xxT(2,j) + &
-                      A2_mxm_m2_m1_7points_att(i,3)*hprime_xxT(3,j) + &
-                      A2_mxm_m2_m1_7points_att(i,4)*hprime_xxT(4,j) + &
-                      A2_mxm_m2_m1_7points_att(i,5)*hprime_xxT(5,j) + &
-                      A2_mxm_m2_m1_7points_att(i,6)*hprime_xxT(6,j) + &
-                      A2_mxm_m2_m1_7points_att(i,7)*hprime_xxT(7,j)
-
-                 C3_mxm_m2_m1_7points_att(i,j) = C3_mxm_m2_m1_7points(i,j) + &
-                      A3_mxm_m2_m1_7points_att(i,1)*hprime_xxT(1,j) + &
-                      A3_mxm_m2_m1_7points_att(i,2)*hprime_xxT(2,j) + &
-                      A3_mxm_m2_m1_7points_att(i,3)*hprime_xxT(3,j) + &
-                      A3_mxm_m2_m1_7points_att(i,4)*hprime_xxT(4,j) + &
-                      A3_mxm_m2_m1_7points_att(i,5)*hprime_xxT(5,j) + &
-                      A3_mxm_m2_m1_7points_att(i,6)*hprime_xxT(6,j) + &
-                      A3_mxm_m2_m1_7points_att(i,7)*hprime_xxT(7,j)
-              enddo
-           enddo
-        endif
-
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-              ! get derivatives of ux, uy and uz with respect to x, y and z
-              xixl = xix(i,j,k,ispec)
-              xiyl = xiy(i,j,k,ispec)
-              xizl = xiz(i,j,k,ispec)
-              etaxl = etax(i,j,k,ispec)
-              etayl = etay(i,j,k,ispec)
-              etazl = etaz(i,j,k,ispec)
-              gammaxl = gammax(i,j,k,ispec)
-              gammayl = gammay(i,j,k,ispec)
-              gammazl = gammaz(i,j,k,ispec)
-              jacobianl = jacobian(i,j,k,ispec)
-
-              duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
-              duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
-              duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
-
-              duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
-              duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
-              duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
-
-              duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
-              duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
-              duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
-
-              ! save strain on the Moho boundary
-              if (SAVE_MOHO_MESH ) then
-                if (is_moho_top(ispec)) then
-                  dsdx_top(1,1,i,j,k,ispec2D_moho_top) = duxdxl
-                  dsdx_top(1,2,i,j,k,ispec2D_moho_top) = duxdyl
-                  dsdx_top(1,3,i,j,k,ispec2D_moho_top) = duxdzl
-                  dsdx_top(2,1,i,j,k,ispec2D_moho_top) = duydxl
-                  dsdx_top(2,2,i,j,k,ispec2D_moho_top) = duydyl
-                  dsdx_top(2,3,i,j,k,ispec2D_moho_top) = duydzl
-                  dsdx_top(3,1,i,j,k,ispec2D_moho_top) = duzdxl
-                  dsdx_top(3,2,i,j,k,ispec2D_moho_top) = duzdyl
-                  dsdx_top(3,3,i,j,k,ispec2D_moho_top) = duzdzl
-                else if (is_moho_bot(ispec)) then
-                  dsdx_bot(1,1,i,j,k,ispec2D_moho_bot) = duxdxl
-                  dsdx_bot(1,2,i,j,k,ispec2D_moho_bot) = duxdyl
-                  dsdx_bot(1,3,i,j,k,ispec2D_moho_bot) = duxdzl
-                  dsdx_bot(2,1,i,j,k,ispec2D_moho_bot) = duydxl
-                  dsdx_bot(2,2,i,j,k,ispec2D_moho_bot) = duydyl
-                  dsdx_bot(2,3,i,j,k,ispec2D_moho_bot) = duydzl
-                  dsdx_bot(3,1,i,j,k,ispec2D_moho_bot) = duzdxl
-                  dsdx_bot(3,2,i,j,k,ispec2D_moho_bot) = duzdyl
-                  dsdx_bot(3,3,i,j,k,ispec2D_moho_bot) = duzdzl
-                endif
-              endif
-
-              ! precompute some sums to save CPU time
-              duxdxl_plus_duydyl = duxdxl + duydyl
-              duxdxl_plus_duzdzl = duxdxl + duzdzl
-              duydyl_plus_duzdzl = duydyl + duzdzl
-              duxdyl_plus_duydxl = duxdyl + duydxl
-              duzdxl_plus_duxdzl = duzdxl + duxdzl
-              duzdyl_plus_duydzl = duzdyl + duydzl
-
-              if ( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
-                 ! temporary variables used for fixing attenuation in a consistent way
-                 duxdxl_att = xixl*tempx1_att(i,j,k) + etaxl*tempx2_att(i,j,k) + gammaxl*tempx3_att(i,j,k)
-                 duxdyl_att = xiyl*tempx1_att(i,j,k) + etayl*tempx2_att(i,j,k) + gammayl*tempx3_att(i,j,k)
-                 duxdzl_att = xizl*tempx1_att(i,j,k) + etazl*tempx2_att(i,j,k) + gammazl*tempx3_att(i,j,k)
-
-                 duydxl_att = xixl*tempy1_att(i,j,k) + etaxl*tempy2_att(i,j,k) + gammaxl*tempy3_att(i,j,k)
-                 duydyl_att = xiyl*tempy1_att(i,j,k) + etayl*tempy2_att(i,j,k) + gammayl*tempy3_att(i,j,k)
-                 duydzl_att = xizl*tempy1_att(i,j,k) + etazl*tempy2_att(i,j,k) + gammazl*tempy3_att(i,j,k)
-
-                 duzdxl_att = xixl*tempz1_att(i,j,k) + etaxl*tempz2_att(i,j,k) + gammaxl*tempz3_att(i,j,k)
-                 duzdyl_att = xiyl*tempz1_att(i,j,k) + etayl*tempz2_att(i,j,k) + gammayl*tempz3_att(i,j,k)
-                 duzdzl_att = xizl*tempz1_att(i,j,k) + etazl*tempz2_att(i,j,k) + gammazl*tempz3_att(i,j,k)
-
-                 ! precompute some sums to save CPU time
-                 duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
-                 duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
-                 duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
-
-                 ! compute deviatoric strain
-                 templ = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
-                 if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                 epsilondev_xx_loc(i,j,k) = duxdxl_att - templ
-                 epsilondev_yy_loc(i,j,k) = duydyl_att - templ
-                 epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl_att
-                 epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
-                 epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl_att
-              else
-                 ! computes deviatoric strain attenuation and/or for kernel calculations
-                 if (COMPUTE_AND_STORE_STRAIN) then
-                    templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
-                    if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                    epsilondev_xx_loc(i,j,k) = duxdxl - templ
-                    epsilondev_yy_loc(i,j,k) = duydyl - templ
-                    epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl
-                    epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl
-                    epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl
-                 endif
-              endif
-
-              kappal = kappastore(i,j,k,ispec)
-              mul = mustore(i,j,k,ispec)
-
-              ! attenuation
-              if(ATTENUATION) then
-                ! use unrelaxed parameters if attenuation
-                mul  = mul * one_minus_sum_beta(i,j,k,ispec)
-              endif
-
-  ! full anisotropic case, stress calculations
-              if(ANISOTROPY) then
-                c11 = c11store(i,j,k,ispec)
-                c12 = c12store(i,j,k,ispec)
-                c13 = c13store(i,j,k,ispec)
-                c14 = c14store(i,j,k,ispec)
-                c15 = c15store(i,j,k,ispec)
-                c16 = c16store(i,j,k,ispec)
-                c22 = c22store(i,j,k,ispec)
-                c23 = c23store(i,j,k,ispec)
-                c24 = c24store(i,j,k,ispec)
-                c25 = c25store(i,j,k,ispec)
-                c26 = c26store(i,j,k,ispec)
-                c33 = c33store(i,j,k,ispec)
-                c34 = c34store(i,j,k,ispec)
-                c35 = c35store(i,j,k,ispec)
-                c36 = c36store(i,j,k,ispec)
-                c44 = c44store(i,j,k,ispec)
-                c45 = c45store(i,j,k,ispec)
-                c46 = c46store(i,j,k,ispec)
-                c55 = c55store(i,j,k,ispec)
-                c56 = c56store(i,j,k,ispec)
-                c66 = c66store(i,j,k,ispec)
-
-                sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
-                          c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
-                sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
-                          c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
-                sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
-                          c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
-                sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
-                          c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
-                sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
-                          c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
-                sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
-                          c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
-
-              else
-
-  ! isotropic case
-                lambdalplus2mul = kappal + FOUR_THIRDS * mul
-                lambdal = lambdalplus2mul - 2.*mul
-
-                ! compute stress sigma
-                sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
-                sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
-                sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
-
-                sigma_xy = mul*duxdyl_plus_duydxl
-                sigma_xz = mul*duzdxl_plus_duxdzl
-                sigma_yz = mul*duzdyl_plus_duydzl
-
-              endif ! ANISOTROPY
-
-              ! subtract memory variables if attenuation
-              if(ATTENUATION) then
-! way 1
-!                do i_sls = 1,N_SLS
-!                  R_xx_val = R_xx(i,j,k,ispec,i_sls)
-!                  R_yy_val = R_yy(i,j,k,ispec,i_sls)
-!                  sigma_xx = sigma_xx - R_xx_val
-!                  sigma_yy = sigma_yy - R_yy_val
-!                  sigma_zz = sigma_zz + R_xx_val + R_yy_val
-!                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-!                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-!                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-!                enddo
-
-! way 2
-! note: this should help compilers to pipeline the code and make better use of the cache;
-!          depending on compilers, it can further decrease the computation time by ~ 30%.
-!          by default, N_SLS = 3, therefore we take steps of 3
-              if(imodulo_N_SLS >= 1) then
-                do i_sls = 1,imodulo_N_SLS
-                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                  sigma_xx = sigma_xx - R_xx_val1
-                  sigma_yy = sigma_yy - R_yy_val1
-                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-                enddo
-              endif
-
-              if(N_SLS >= imodulo_N_SLS+1) then
-                do i_sls = imodulo_N_SLS+1,N_SLS,3
-                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                  sigma_xx = sigma_xx - R_xx_val1
-                  sigma_yy = sigma_yy - R_yy_val1
-                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-
-                  R_xx_val2 = R_xx(i,j,k,ispec,i_sls+1)
-                  R_yy_val2 = R_yy(i,j,k,ispec,i_sls+1)
-                  sigma_xx = sigma_xx - R_xx_val2
-                  sigma_yy = sigma_yy - R_yy_val2
-                  sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+1)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+1)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+1)
-
-                  R_xx_val3 = R_xx(i,j,k,ispec,i_sls+2)
-                  R_yy_val3 = R_yy(i,j,k,ispec,i_sls+2)
-                  sigma_xx = sigma_xx - R_xx_val3
-                  sigma_yy = sigma_yy - R_yy_val3
-                  sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+2)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+2)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+2)
-                enddo
-              endif
-
-
-              endif
-
-            ! define symmetric components of sigma
-            sigma_yx = sigma_xy
-            sigma_zx = sigma_xz
-            sigma_zy = sigma_yz
-
-            ! form dot product with test vector, non-symmetric form (which is useful in the case of PML)
-            tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
-            tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
-            tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
-
-            tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
-            tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
-            tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
-
-            tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
-            tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
-            tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
-
-            enddo
-          enddo
-        enddo
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-        ! call mxm_m1_m2_7points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
-        do j=1,m2
-          do i=1,m1
-            E1_m1_m2_7points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_7points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C1_m1_m2_7points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C1_m1_m2_7points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C1_m1_m2_7points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C1_m1_m2_7points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C1_m1_m2_7points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C1_m1_m2_7points(7,j)
-            E2_m1_m2_7points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_7points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C2_m1_m2_7points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C2_m1_m2_7points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C2_m1_m2_7points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C2_m1_m2_7points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C2_m1_m2_7points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C2_m1_m2_7points(7,j)
-            E3_m1_m2_7points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_7points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C3_m1_m2_7points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C3_m1_m2_7points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C3_m1_m2_7points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C3_m1_m2_7points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C3_m1_m2_7points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C3_m1_m2_7points(7,j)
-          enddo
-        enddo
-
-        !   call mxm_m1_m1_7points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
-        !         hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
-        do i=1,m1
-          do j=1,m1
-            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-            do k = 1,NGLLX
-              newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempx2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempx2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempx2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempx2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempx2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempx2(i,7,k)*hprimewgll_xx(7,j)
-              newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempy2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempy2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempy2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempy2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempy2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempy2(i,7,k)*hprimewgll_xx(7,j)
-              newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempz2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempz2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempz2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempz2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempz2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempz2(i,7,k)*hprimewgll_xx(7,j)
-            enddo
-          enddo
-        enddo
-
-        ! call mxm_m2_m1_7points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
-        do j=1,m1
-          do i=1,m2
-            E1_mxm_m2_m1_7points(i,j) = C1_mxm_m2_m1_7points(i,1)*hprimewgll_xx(1,j) + &
-                                      C1_mxm_m2_m1_7points(i,2)*hprimewgll_xx(2,j) + &
-                                      C1_mxm_m2_m1_7points(i,3)*hprimewgll_xx(3,j) + &
-                                      C1_mxm_m2_m1_7points(i,4)*hprimewgll_xx(4,j) + &
-                                      C1_mxm_m2_m1_7points(i,5)*hprimewgll_xx(5,j) + &
-                                      C1_mxm_m2_m1_7points(i,6)*hprimewgll_xx(6,j) + &
-                                      C1_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j)
-            E2_mxm_m2_m1_7points(i,j) = C2_mxm_m2_m1_7points(i,1)*hprimewgll_xx(1,j) + &
-                                      C2_mxm_m2_m1_7points(i,2)*hprimewgll_xx(2,j) + &
-                                      C2_mxm_m2_m1_7points(i,3)*hprimewgll_xx(3,j) + &
-                                      C2_mxm_m2_m1_7points(i,4)*hprimewgll_xx(4,j) + &
-                                      C2_mxm_m2_m1_7points(i,5)*hprimewgll_xx(5,j) + &
-                                      C2_mxm_m2_m1_7points(i,6)*hprimewgll_xx(6,j) + &
-                                      C2_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j)
-            E3_mxm_m2_m1_7points(i,j) = C3_mxm_m2_m1_7points(i,1)*hprimewgll_xx(1,j) + &
-                                      C3_mxm_m2_m1_7points(i,2)*hprimewgll_xx(2,j) + &
-                                      C3_mxm_m2_m1_7points(i,3)*hprimewgll_xx(3,j) + &
-                                      C3_mxm_m2_m1_7points(i,4)*hprimewgll_xx(4,j) + &
-                                      C3_mxm_m2_m1_7points(i,5)*hprimewgll_xx(5,j) + &
-                                      C3_mxm_m2_m1_7points(i,6)*hprimewgll_xx(6,j) + &
-                                      C3_mxm_m2_m1_7points(i,7)*hprimewgll_xx(7,j)
-          enddo
-        enddo
-
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-
-              fac1 = wgllwgll_yz(j,k)
-              fac2 = wgllwgll_xz(i,k)
-              fac3 = wgllwgll_xy(i,j)
-
-              ! sum contributions from each element to the global mesh using indirect addressing
-              iglob = ibool(i,j,k,ispec)
-              accel(1,iglob) = accel(1,iglob) - fac1*newtempx1(i,j,k) - &
-                                fac2*newtempx2(i,j,k) - fac3*newtempx3(i,j,k)
-              accel(2,iglob) = accel(2,iglob) - fac1*newtempy1(i,j,k) - &
-                                fac2*newtempy2(i,j,k) - fac3*newtempy3(i,j,k)
-              accel(3,iglob) = accel(3,iglob) - fac1*newtempz1(i,j,k) - &
-                                fac2*newtempz2(i,j,k) - fac3*newtempz3(i,j,k)
-
-              !  update memory variables based upon the Runge-Kutta scheme
-              if(ATTENUATION) then
-
-                 ! use Runge-Kutta scheme to march in time
-                 do i_sls = 1,N_SLS
-
-                    factor_loc = mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
-
-                    alphaval_loc = alphaval(i_sls)
-                    betaval_loc = betaval(i_sls)
-                    gammaval_loc = gammaval(i_sls)
-
-                    ! term in xx
-                    Sn   = factor_loc * epsilondev_xx(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xx_loc(i,j,k)
-                    R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in yy
-                    Sn   = factor_loc * epsilondev_yy(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_yy_loc(i,j,k)
-                    R_yy(i,j,k,ispec,i_sls) = alphaval_loc * R_yy(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in zz not computed since zero trace
-                    ! term in xy
-                    Sn   = factor_loc * epsilondev_xy(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xy_loc(i,j,k)
-                    R_xy(i,j,k,ispec,i_sls) = alphaval_loc * R_xy(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in xz
-                    Sn   = factor_loc * epsilondev_xz(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xz_loc(i,j,k)
-                    R_xz(i,j,k,ispec,i_sls) = alphaval_loc * R_xz(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in yz
-                    Sn   = factor_loc * epsilondev_yz(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_yz_loc(i,j,k)
-                    R_yz(i,j,k,ispec,i_sls) = alphaval_loc * R_yz(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-
-                 enddo   ! end of loop on memory variables
-
-              endif  !  end attenuation
-
-            enddo
-          enddo
-        enddo
-
-        ! save deviatoric strain for Runge-Kutta scheme
-        if ( COMPUTE_AND_STORE_STRAIN ) then
-          epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
-          epsilondev_yy(:,:,:,ispec) = epsilondev_yy_loc(:,:,:)
-          epsilondev_xy(:,:,:,ispec) = epsilondev_xy_loc(:,:,:)
-          epsilondev_xz(:,:,:,ispec) = epsilondev_xz_loc(:,:,:)
-          epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:)
-        endif
-
-  enddo  ! spectral element loop
-
-end subroutine compute_forces_elastic_Dev_7p
-
-!
-!=====================================================================
-!
-
-subroutine compute_forces_elastic_Dev_8p( iphase ,NSPEC_AB,NGLOB_AB, &
-                                    displ,veloc,accel, &
-                                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                                    hprime_xx,hprime_xxT, &
-                                    hprimewgll_xx,hprimewgll_xxT, &
-                                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                                    kappastore,mustore,jacobian,ibool, &
-                                    ATTENUATION,deltat, &
-                                    one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
-                                    NSPEC_ATTENUATION_AB, &
-                                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                                    ANISOTROPY,NSPEC_ANISO, &
-                                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                                    c55store,c56store,c66store, &
-                                    SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
-                                    is_moho_top,is_moho_bot, &
-                                    dsdx_top,dsdx_bot, &
-                                    ispec2D_moho_top,ispec2D_moho_bot, &
-                                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                                    phase_ispec_inner_elastic)
-
-
-! computes elastic tensor term
-
-  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
-                      N_SLS,SAVE_MOHO_MESH, &
-                      ONE_THIRD,FOUR_THIRDS,m1,m2
-  implicit none
-
-  integer :: NSPEC_AB,NGLOB_AB
-
-! displacement, velocity and acceleration
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,veloc,accel
-
-! time step
-  real(kind=CUSTOM_REAL) :: deltat
-
-! arrays with mesh parameters per slice
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        kappastore,mustore,jacobian
-
-! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,8) :: hprime_xx,hprimewgll_xxT
-  real(kind=CUSTOM_REAL), dimension(8,NGLLX) :: hprime_xxT,hprimewgll_xx
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-! memory variables and standard linear solids for attenuation
-  logical :: ATTENUATION
-  logical :: COMPUTE_AND_STORE_STRAIN
-  integer :: NSPEC_STRAIN_ONLY, NSPEC_ADJOINT
-  integer :: NSPEC_ATTENUATION_AB
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: one_minus_sum_beta
-  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: factor_common
-  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB,N_SLS) :: &
-      R_xx,R_yy,R_xy,R_xz,R_yz
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY) :: &
-       epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
-  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilon_trace_over_3
-
-! anisotropy
-  logical :: ANISOTROPY
-  integer :: NSPEC_ANISO
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
-            c11store,c12store,c13store,c14store,c15store,c16store, &
-            c22store,c23store,c24store,c25store,c26store,c33store, &
-            c34store,c35store,c36store,c44store,c45store,c46store, &
-            c55store,c56store,c66store
-
-  integer :: iphase
-  integer :: num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic
-  integer, dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
-
-! adjoint simulations
-  integer :: SIMULATION_TYPE
-  integer :: NSPEC_BOUN,NSPEC2D_MOHO
-
-  ! moho kernel
-  real(kind=CUSTOM_REAL),dimension(NDIM,NDIM,NGLLX,NGLLY,NGLLZ,NSPEC2D_MOHO):: &
-    dsdx_top,dsdx_bot
-  logical,dimension(NSPEC_BOUN) :: is_moho_top,is_moho_bot
-  integer :: ispec2D_moho_top, ispec2D_moho_bot
-
-! local parameters
-  real(kind=CUSTOM_REAL), dimension(8,8,8) :: dummyx_loc,dummyy_loc,dummyz_loc, &
-    newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
-  real(kind=CUSTOM_REAL), dimension(8,8,8) :: &
-    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
-
-  ! manually inline the calls to the Deville et al. (2002) routines
-  real(kind=CUSTOM_REAL), dimension(8,64) :: B1_m1_m2_8points,B2_m1_m2_8points,B3_m1_m2_8points
-  real(kind=CUSTOM_REAL), dimension(8,64) :: C1_m1_m2_8points,C2_m1_m2_8points,C3_m1_m2_8points
-  real(kind=CUSTOM_REAL), dimension(8,64) :: E1_m1_m2_8points,E2_m1_m2_8points,E3_m1_m2_8points
-
-  equivalence(dummyx_loc,B1_m1_m2_8points)
-  equivalence(dummyy_loc,B2_m1_m2_8points)
-  equivalence(dummyz_loc,B3_m1_m2_8points)
-  equivalence(tempx1,C1_m1_m2_8points)
-  equivalence(tempy1,C2_m1_m2_8points)
-  equivalence(tempz1,C3_m1_m2_8points)
-  equivalence(newtempx1,E1_m1_m2_8points)
-  equivalence(newtempy1,E2_m1_m2_8points)
-  equivalence(newtempz1,E3_m1_m2_8points)
-
-  real(kind=CUSTOM_REAL), dimension(8,8,8) :: &
-    tempx1_att,tempx2_att,tempx3_att,tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
-  real(kind=CUSTOM_REAL), dimension(8,8,8) :: dummyx_loc_att,dummyy_loc_att,dummyz_loc_att
-  real(kind=CUSTOM_REAL), dimension(8,64) :: B1_m1_m2_8points_att,B2_m1_m2_8points_att,B3_m1_m2_8points_att
-  real(kind=CUSTOM_REAL), dimension(8,64) :: C1_m1_m2_8points_att,C2_m1_m2_8points_att,C3_m1_m2_8points_att
-
-  equivalence(dummyx_loc_att,B1_m1_m2_8points_att)
-  equivalence(dummyy_loc_att,B2_m1_m2_8points_att)
-  equivalence(dummyz_loc_att,B3_m1_m2_8points_att)
-  equivalence(tempx1_att,C1_m1_m2_8points_att)
-  equivalence(tempy1_att,C2_m1_m2_8points_att)
-  equivalence(tempz1_att,C3_m1_m2_8points_att)
-
-  real(kind=CUSTOM_REAL), dimension(64,8) :: &
-    A1_mxm_m2_m1_8points,A2_mxm_m2_m1_8points,A3_mxm_m2_m1_8points
-  real(kind=CUSTOM_REAL), dimension(64,8) :: &
-    C1_mxm_m2_m1_8points,C2_mxm_m2_m1_8points,C3_mxm_m2_m1_8points
-  real(kind=CUSTOM_REAL), dimension(64,8) :: &
-    E1_mxm_m2_m1_8points,E2_mxm_m2_m1_8points,E3_mxm_m2_m1_8points
-
-  equivalence(dummyx_loc,A1_mxm_m2_m1_8points)
-  equivalence(dummyy_loc,A2_mxm_m2_m1_8points)
-  equivalence(dummyz_loc,A3_mxm_m2_m1_8points)
-  equivalence(tempx3,C1_mxm_m2_m1_8points)
-  equivalence(tempy3,C2_mxm_m2_m1_8points)
-  equivalence(tempz3,C3_mxm_m2_m1_8points)
-  equivalence(newtempx3,E1_mxm_m2_m1_8points)
-  equivalence(newtempy3,E2_mxm_m2_m1_8points)
-  equivalence(newtempz3,E3_mxm_m2_m1_8points)
-
-  real(kind=CUSTOM_REAL), dimension(64,8) :: &
-    A1_mxm_m2_m1_8points_att,A2_mxm_m2_m1_8points_att,A3_mxm_m2_m1_8points_att
-  real(kind=CUSTOM_REAL), dimension(64,8) :: &
-    C1_mxm_m2_m1_8points_att,C2_mxm_m2_m1_8points_att,C3_mxm_m2_m1_8points_att
-
-  equivalence(dummyx_loc_att,A1_mxm_m2_m1_8points_att)
-  equivalence(dummyy_loc_att,A2_mxm_m2_m1_8points_att)
-  equivalence(dummyz_loc_att,A3_mxm_m2_m1_8points_att)
-  equivalence(tempx3_att,C1_mxm_m2_m1_8points_att)
-  equivalence(tempy3_att,C2_mxm_m2_m1_8points_att)
-  equivalence(tempz3_att,C3_mxm_m2_m1_8points_att)
-
-  ! local attenuation parameters
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: epsilondev_xx_loc, &
-       epsilondev_yy_loc, epsilondev_xy_loc, epsilondev_xz_loc, epsilondev_yz_loc
-  real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3
-  real(kind=CUSTOM_REAL) factor_loc,alphaval_loc,betaval_loc,gammaval_loc
-  real(kind=CUSTOM_REAL) Sn,Snp1
-  real(kind=CUSTOM_REAL) templ
-
-  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
-  real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
-
-  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
-
-  real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy
-
-  real(kind=CUSTOM_REAL) fac1,fac2,fac3
-
-  real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
-  real(kind=CUSTOM_REAL) kappal
-
-  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
-  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att
-
-  ! local anisotropy parameters
-  real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
-                        c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
-
-  integer i_SLS,imodulo_N_SLS
-  integer ispec,iglob,ispec_p,num_elements
-  integer i,j,k
-
-  imodulo_N_SLS = mod(N_SLS,3)
-
-  ! choses inner/outer elements
-  if( iphase == 1 ) then
-    num_elements = nspec_outer_elastic
-  else
-    num_elements = nspec_inner_elastic
-  endif
-
-  do ispec_p = 1,num_elements
-
-        ! returns element id from stored element list
-        ispec = phase_ispec_inner_elastic(ispec_p,iphase)
-
-        ! adjoint simulations: moho kernel
-        if( SIMULATION_TYPE == 3 .and. SAVE_MOHO_MESH ) then
-          if (is_moho_top(ispec)) then
-            ispec2D_moho_top = ispec2D_moho_top + 1
-          else if (is_moho_bot(ispec)) then
-            ispec2D_moho_bot = ispec2D_moho_bot + 1
-          endif
-        endif ! adjoint
-
-        ! stores displacment values in local array
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-                iglob = ibool(i,j,k,ispec)
-                dummyx_loc(i,j,k) = displ(1,iglob)
-                dummyy_loc(i,j,k) = displ(2,iglob)
-                dummyz_loc(i,j,k) = displ(3,iglob)
-            enddo
-          enddo
-        enddo
-
-        ! use first order Taylor expansion of displacement for local storage of stresses 
-        ! at this current time step, to fix attenuation in a consistent way
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           do k=1,NGLLZ
-              do j=1,NGLLY
-                 do i=1,NGLLX
-                    iglob = ibool(i,j,k,ispec)
-                    dummyx_loc_att(i,j,k) = deltat*veloc(1,iglob)
-                    dummyy_loc_att(i,j,k) = deltat*veloc(2,iglob)
-                    dummyz_loc_att(i,j,k) = deltat*veloc(3,iglob)
-                 enddo
-              enddo
-           enddo
-        endif
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-        ! call mxm_m1_m2_8points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
-        do j=1,m2
-          do i=1,m1
-            C1_m1_m2_8points(i,j) = hprime_xx(i,1)*B1_m1_m2_8points(1,j) + &
-                                  hprime_xx(i,2)*B1_m1_m2_8points(2,j) + &
-                                  hprime_xx(i,3)*B1_m1_m2_8points(3,j) + &
-                                  hprime_xx(i,4)*B1_m1_m2_8points(4,j) + &
-                                  hprime_xx(i,5)*B1_m1_m2_8points(5,j) + &
-                                  hprime_xx(i,6)*B1_m1_m2_8points(6,j) + &
-                                  hprime_xx(i,7)*B1_m1_m2_8points(7,j) + &
-                                  hprime_xx(i,8)*B1_m1_m2_8points(8,j)
-            C2_m1_m2_8points(i,j) = hprime_xx(i,1)*B2_m1_m2_8points(1,j) + &
-                                  hprime_xx(i,2)*B2_m1_m2_8points(2,j) + &
-                                  hprime_xx(i,3)*B2_m1_m2_8points(3,j) + &
-                                  hprime_xx(i,4)*B2_m1_m2_8points(4,j) + &
-                                  hprime_xx(i,5)*B2_m1_m2_8points(5,j) + &
-                                  hprime_xx(i,6)*B2_m1_m2_8points(6,j) + &
-                                  hprime_xx(i,7)*B2_m1_m2_8points(7,j) + &
-                                  hprime_xx(i,8)*B2_m1_m2_8points(8,j)
-            C3_m1_m2_8points(i,j) = hprime_xx(i,1)*B3_m1_m2_8points(1,j) + &
-                                  hprime_xx(i,2)*B3_m1_m2_8points(2,j) + &
-                                  hprime_xx(i,3)*B3_m1_m2_8points(3,j) + &
-                                  hprime_xx(i,4)*B3_m1_m2_8points(4,j) + &
-                                  hprime_xx(i,5)*B3_m1_m2_8points(5,j) + &
-                                  hprime_xx(i,6)*B3_m1_m2_8points(6,j) + &
-                                  hprime_xx(i,7)*B3_m1_m2_8points(7,j) + &
-                                  hprime_xx(i,8)*B3_m1_m2_8points(8,j)
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m2
-              do i=1,m1
-                 C1_m1_m2_8points_att(i,j) = C1_m1_m2_8points(i,j) + & 
-                      hprime_xx(i,1)*B1_m1_m2_8points_att(1,j) + &
-                      hprime_xx(i,2)*B1_m1_m2_8points_att(2,j) + &
-                      hprime_xx(i,3)*B1_m1_m2_8points_att(3,j) + &
-                      hprime_xx(i,4)*B1_m1_m2_8points_att(4,j) + &
-                      hprime_xx(i,5)*B1_m1_m2_8points_att(5,j) + &
-                      hprime_xx(i,6)*B1_m1_m2_8points_att(6,j) + &
-                      hprime_xx(i,7)*B1_m1_m2_8points_att(7,j) + &
-                      hprime_xx(i,8)*B1_m1_m2_8points_att(8,j)
-
-                 C2_m1_m2_8points_att(i,j) = C2_m1_m2_8points(i,j) + &
-                      hprime_xx(i,1)*B2_m1_m2_8points_att(1,j) + &
-                      hprime_xx(i,2)*B2_m1_m2_8points_att(2,j) + &
-                      hprime_xx(i,3)*B2_m1_m2_8points_att(3,j) + &
-                      hprime_xx(i,4)*B2_m1_m2_8points_att(4,j) + &
-                      hprime_xx(i,5)*B2_m1_m2_8points_att(5,j) + &
-                      hprime_xx(i,6)*B2_m1_m2_8points_att(6,j) + &
-                      hprime_xx(i,7)*B2_m1_m2_8points_att(7,j) + &
-                      hprime_xx(i,8)*B2_m1_m2_8points_att(8,j)
-
-                 C3_m1_m2_8points_att(i,j) = C3_m1_m2_8points(i,j) + &
-                      hprime_xx(i,1)*B3_m1_m2_8points_att(1,j) + &
-                      hprime_xx(i,2)*B3_m1_m2_8points_att(2,j) + &
-                      hprime_xx(i,3)*B3_m1_m2_8points_att(3,j) + &
-                      hprime_xx(i,4)*B3_m1_m2_8points_att(4,j) + &
-                      hprime_xx(i,5)*B3_m1_m2_8points_att(5,j) + &
-                      hprime_xx(i,6)*B3_m1_m2_8points_att(6,j) + &
-                      hprime_xx(i,7)*B3_m1_m2_8points_att(7,j) + &
-                      hprime_xx(i,8)*B3_m1_m2_8points_att(8,j)
-              enddo
-           enddo
-        endif
-
-        !   call mxm_m1_m1_8points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
-        !          hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
-        do j=1,m1
-          do i=1,m1
-            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-            do k = 1,NGLLX
-              tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyx_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyx_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyx_loc(i,7,k)*hprime_xxT(7,j) + &
-                            dummyx_loc(i,8,k)*hprime_xxT(8,j)
-              tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyy_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyy_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyy_loc(i,7,k)*hprime_xxT(7,j) + &
-                            dummyy_loc(i,8,k)*hprime_xxT(8,j)
-              tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyz_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyz_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyz_loc(i,7,k)*hprime_xxT(7,j) + &
-                            dummyz_loc(i,8,k)*hprime_xxT(8,j)
-            enddo
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m1
-              do i=1,m1
-                 ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-                 do k = 1,NGLLX
-                    tempx2_att(i,j,k) = tempx2(i,j,k) + &
-                         dummyx_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyx_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyx_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyx_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyx_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyx_loc_att(i,6,k)*hprime_xxT(6,j) + &
-                         dummyx_loc_att(i,7,k)*hprime_xxT(7,j) + &
-                         dummyx_loc_att(i,8,k)*hprime_xxT(8,j)
-
-                    tempy2_att(i,j,k) = tempy2(i,j,k) + &
-                         dummyy_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyy_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyy_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyy_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyy_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyy_loc_att(i,6,k)*hprime_xxT(6,j) + &
-                         dummyy_loc_att(i,7,k)*hprime_xxT(7,j) + &
-                         dummyy_loc_att(i,8,k)*hprime_xxT(8,j)
-
-                    tempz2_att(i,j,k) = tempz2(i,j,k) + &
-                         dummyz_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyz_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyz_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyz_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyz_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyz_loc_att(i,6,k)*hprime_xxT(6,j) + &
-                         dummyz_loc_att(i,7,k)*hprime_xxT(7,j) + &
-                         dummyz_loc_att(i,8,k)*hprime_xxT(8,j)
-                 enddo
-              enddo
-           enddo
-        endif
-
-        ! call mxm_m2_m1_8points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
-        do j=1,m1
-          do i=1,m2
-            C1_mxm_m2_m1_8points(i,j) = A1_mxm_m2_m1_8points(i,1)*hprime_xxT(1,j) + &
-                                      A1_mxm_m2_m1_8points(i,2)*hprime_xxT(2,j) + &
-                                      A1_mxm_m2_m1_8points(i,3)*hprime_xxT(3,j) + &
-                                      A1_mxm_m2_m1_8points(i,4)*hprime_xxT(4,j) + &
-                                      A1_mxm_m2_m1_8points(i,5)*hprime_xxT(5,j) + &
-                                      A1_mxm_m2_m1_8points(i,6)*hprime_xxT(6,j) + &
-                                      A1_mxm_m2_m1_8points(i,7)*hprime_xxT(7,j) + &
-                                      A1_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j)
-            C2_mxm_m2_m1_8points(i,j) = A2_mxm_m2_m1_8points(i,1)*hprime_xxT(1,j) + &
-                                      A2_mxm_m2_m1_8points(i,2)*hprime_xxT(2,j) + &
-                                      A2_mxm_m2_m1_8points(i,3)*hprime_xxT(3,j) + &
-                                      A2_mxm_m2_m1_8points(i,4)*hprime_xxT(4,j) + &
-                                      A2_mxm_m2_m1_8points(i,5)*hprime_xxT(5,j) + &
-                                      A2_mxm_m2_m1_8points(i,6)*hprime_xxT(6,j) + &
-                                      A2_mxm_m2_m1_8points(i,7)*hprime_xxT(7,j) + &
-                                      A2_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j)
-            C3_mxm_m2_m1_8points(i,j) = A3_mxm_m2_m1_8points(i,1)*hprime_xxT(1,j) + &
-                                      A3_mxm_m2_m1_8points(i,2)*hprime_xxT(2,j) + &
-                                      A3_mxm_m2_m1_8points(i,3)*hprime_xxT(3,j) + &
-                                      A3_mxm_m2_m1_8points(i,4)*hprime_xxT(4,j) + &
-                                      A3_mxm_m2_m1_8points(i,5)*hprime_xxT(5,j) + &
-                                      A3_mxm_m2_m1_8points(i,6)*hprime_xxT(6,j) + &
-                                      A3_mxm_m2_m1_8points(i,7)*hprime_xxT(7,j) + &
-                                      A3_mxm_m2_m1_8points(i,8)*hprime_xxT(8,j)
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m1
-              do i=1,m2
-                 C1_mxm_m2_m1_8points_att(i,j) = C1_mxm_m2_m1_8points(i,j) + &
-                      A1_mxm_m2_m1_8points_att(i,1)*hprime_xxT(1,j) + &
-                      A1_mxm_m2_m1_8points_att(i,2)*hprime_xxT(2,j) + &
-                      A1_mxm_m2_m1_8points_att(i,3)*hprime_xxT(3,j) + &
-                      A1_mxm_m2_m1_8points_att(i,4)*hprime_xxT(4,j) + &
-                      A1_mxm_m2_m1_8points_att(i,5)*hprime_xxT(5,j) + &
-                      A1_mxm_m2_m1_8points_att(i,6)*hprime_xxT(6,j) + &
-                      A1_mxm_m2_m1_8points_att(i,7)*hprime_xxT(7,j) + &
-                      A1_mxm_m2_m1_8points_att(i,8)*hprime_xxT(8,j)
-
-                 C2_mxm_m2_m1_8points_att(i,j) = C2_mxm_m2_m1_8points(i,j) + &
-                      A2_mxm_m2_m1_8points_att(i,1)*hprime_xxT(1,j) + &
-                      A2_mxm_m2_m1_8points_att(i,2)*hprime_xxT(2,j) + &
-                      A2_mxm_m2_m1_8points_att(i,3)*hprime_xxT(3,j) + &
-                      A2_mxm_m2_m1_8points_att(i,4)*hprime_xxT(4,j) + &
-                      A2_mxm_m2_m1_8points_att(i,5)*hprime_xxT(5,j) + &
-                      A2_mxm_m2_m1_8points_att(i,6)*hprime_xxT(6,j) + &
-                      A2_mxm_m2_m1_8points_att(i,7)*hprime_xxT(7,j) + &
-                      A2_mxm_m2_m1_8points_att(i,8)*hprime_xxT(8,j)
-
-                 C3_mxm_m2_m1_8points_att(i,j) = C3_mxm_m2_m1_8points(i,j) + &
-                      A3_mxm_m2_m1_8points_att(i,1)*hprime_xxT(1,j) + &
-                      A3_mxm_m2_m1_8points_att(i,2)*hprime_xxT(2,j) + &
-                      A3_mxm_m2_m1_8points_att(i,3)*hprime_xxT(3,j) + &
-                      A3_mxm_m2_m1_8points_att(i,4)*hprime_xxT(4,j) + &
-                      A3_mxm_m2_m1_8points_att(i,5)*hprime_xxT(5,j) + &
-                      A3_mxm_m2_m1_8points_att(i,6)*hprime_xxT(6,j) + &
-                      A3_mxm_m2_m1_8points_att(i,7)*hprime_xxT(7,j) + &
-                      A3_mxm_m2_m1_8points_att(i,8)*hprime_xxT(8,j)
-              enddo
-           enddo
-        endif
-
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-              ! get derivatives of ux, uy and uz with respect to x, y and z
-              xixl = xix(i,j,k,ispec)
-              xiyl = xiy(i,j,k,ispec)
-              xizl = xiz(i,j,k,ispec)
-              etaxl = etax(i,j,k,ispec)
-              etayl = etay(i,j,k,ispec)
-              etazl = etaz(i,j,k,ispec)
-              gammaxl = gammax(i,j,k,ispec)
-              gammayl = gammay(i,j,k,ispec)
-              gammazl = gammaz(i,j,k,ispec)
-              jacobianl = jacobian(i,j,k,ispec)
-
-              duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
-              duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
-              duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
-
-              duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
-              duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
-              duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
-
-              duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
-              duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
-              duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
-
-              ! save strain on the Moho boundary
-              if (SAVE_MOHO_MESH ) then
-                if (is_moho_top(ispec)) then
-                  dsdx_top(1,1,i,j,k,ispec2D_moho_top) = duxdxl
-                  dsdx_top(1,2,i,j,k,ispec2D_moho_top) = duxdyl
-                  dsdx_top(1,3,i,j,k,ispec2D_moho_top) = duxdzl
-                  dsdx_top(2,1,i,j,k,ispec2D_moho_top) = duydxl
-                  dsdx_top(2,2,i,j,k,ispec2D_moho_top) = duydyl
-                  dsdx_top(2,3,i,j,k,ispec2D_moho_top) = duydzl
-                  dsdx_top(3,1,i,j,k,ispec2D_moho_top) = duzdxl
-                  dsdx_top(3,2,i,j,k,ispec2D_moho_top) = duzdyl
-                  dsdx_top(3,3,i,j,k,ispec2D_moho_top) = duzdzl
-                else if (is_moho_bot(ispec)) then
-                  dsdx_bot(1,1,i,j,k,ispec2D_moho_bot) = duxdxl
-                  dsdx_bot(1,2,i,j,k,ispec2D_moho_bot) = duxdyl
-                  dsdx_bot(1,3,i,j,k,ispec2D_moho_bot) = duxdzl
-                  dsdx_bot(2,1,i,j,k,ispec2D_moho_bot) = duydxl
-                  dsdx_bot(2,2,i,j,k,ispec2D_moho_bot) = duydyl
-                  dsdx_bot(2,3,i,j,k,ispec2D_moho_bot) = duydzl
-                  dsdx_bot(3,1,i,j,k,ispec2D_moho_bot) = duzdxl
-                  dsdx_bot(3,2,i,j,k,ispec2D_moho_bot) = duzdyl
-                  dsdx_bot(3,3,i,j,k,ispec2D_moho_bot) = duzdzl
-                endif
-              endif
-
-              ! precompute some sums to save CPU time
-              duxdxl_plus_duydyl = duxdxl + duydyl
-              duxdxl_plus_duzdzl = duxdxl + duzdzl
-              duydyl_plus_duzdzl = duydyl + duzdzl
-              duxdyl_plus_duydxl = duxdyl + duydxl
-              duzdxl_plus_duxdzl = duzdxl + duxdzl
-              duzdyl_plus_duydzl = duzdyl + duydzl
-
-              if ( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
-                 ! temporary variables used for fixing attenuation in a consistent way
-                 duxdxl_att = xixl*tempx1_att(i,j,k) + etaxl*tempx2_att(i,j,k) + gammaxl*tempx3_att(i,j,k)
-                 duxdyl_att = xiyl*tempx1_att(i,j,k) + etayl*tempx2_att(i,j,k) + gammayl*tempx3_att(i,j,k)
-                 duxdzl_att = xizl*tempx1_att(i,j,k) + etazl*tempx2_att(i,j,k) + gammazl*tempx3_att(i,j,k)
-
-                 duydxl_att = xixl*tempy1_att(i,j,k) + etaxl*tempy2_att(i,j,k) + gammaxl*tempy3_att(i,j,k)
-                 duydyl_att = xiyl*tempy1_att(i,j,k) + etayl*tempy2_att(i,j,k) + gammayl*tempy3_att(i,j,k)
-                 duydzl_att = xizl*tempy1_att(i,j,k) + etazl*tempy2_att(i,j,k) + gammazl*tempy3_att(i,j,k)
-
-                 duzdxl_att = xixl*tempz1_att(i,j,k) + etaxl*tempz2_att(i,j,k) + gammaxl*tempz3_att(i,j,k)
-                 duzdyl_att = xiyl*tempz1_att(i,j,k) + etayl*tempz2_att(i,j,k) + gammayl*tempz3_att(i,j,k)
-                 duzdzl_att = xizl*tempz1_att(i,j,k) + etazl*tempz2_att(i,j,k) + gammazl*tempz3_att(i,j,k)
-
-                 ! precompute some sums to save CPU time
-                 duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
-                 duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
-                 duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
-
-                 ! compute deviatoric strain
-                 templ = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
-                 if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                 epsilondev_xx_loc(i,j,k) = duxdxl_att - templ
-                 epsilondev_yy_loc(i,j,k) = duydyl_att - templ
-                 epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl_att
-                 epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
-                 epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl_att
-              else
-                 ! computes deviatoric strain attenuation and/or for kernel calculations
-                 if (COMPUTE_AND_STORE_STRAIN) then
-                    templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
-                    if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                    epsilondev_xx_loc(i,j,k) = duxdxl - templ
-                    epsilondev_yy_loc(i,j,k) = duydyl - templ
-                    epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl
-                    epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl
-                    epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl
-                 endif
-              endif
-
-              kappal = kappastore(i,j,k,ispec)
-              mul = mustore(i,j,k,ispec)
-
-              ! attenuation
-              if(ATTENUATION) then
-                ! use unrelaxed parameters if attenuation
-                mul  = mul * one_minus_sum_beta(i,j,k,ispec)
-              endif
-
-  ! full anisotropic case, stress calculations
-              if(ANISOTROPY) then
-                c11 = c11store(i,j,k,ispec)
-                c12 = c12store(i,j,k,ispec)
-                c13 = c13store(i,j,k,ispec)
-                c14 = c14store(i,j,k,ispec)
-                c15 = c15store(i,j,k,ispec)
-                c16 = c16store(i,j,k,ispec)
-                c22 = c22store(i,j,k,ispec)
-                c23 = c23store(i,j,k,ispec)
-                c24 = c24store(i,j,k,ispec)
-                c25 = c25store(i,j,k,ispec)
-                c26 = c26store(i,j,k,ispec)
-                c33 = c33store(i,j,k,ispec)
-                c34 = c34store(i,j,k,ispec)
-                c35 = c35store(i,j,k,ispec)
-                c36 = c36store(i,j,k,ispec)
-                c44 = c44store(i,j,k,ispec)
-                c45 = c45store(i,j,k,ispec)
-                c46 = c46store(i,j,k,ispec)
-                c55 = c55store(i,j,k,ispec)
-                c56 = c56store(i,j,k,ispec)
-                c66 = c66store(i,j,k,ispec)
-
-                sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
-                          c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
-                sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
-                          c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
-                sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
-                          c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
-                sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
-                          c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
-                sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
-                          c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
-                sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
-                          c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
-
-              else
-
-  ! isotropic case
-                lambdalplus2mul = kappal + FOUR_THIRDS * mul
-                lambdal = lambdalplus2mul - 2.*mul
-
-                ! compute stress sigma
-                sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
-                sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
-                sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
-
-                sigma_xy = mul*duxdyl_plus_duydxl
-                sigma_xz = mul*duzdxl_plus_duxdzl
-                sigma_yz = mul*duzdyl_plus_duydzl
-
-              endif ! ANISOTROPY
-
-              ! subtract memory variables if attenuation
-              if(ATTENUATION) then
-! way 1
-!                do i_sls = 1,N_SLS
-!                  R_xx_val = R_xx(i,j,k,ispec,i_sls)
-!                  R_yy_val = R_yy(i,j,k,ispec,i_sls)
-!                  sigma_xx = sigma_xx - R_xx_val
-!                  sigma_yy = sigma_yy - R_yy_val
-!                  sigma_zz = sigma_zz + R_xx_val + R_yy_val
-!                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-!                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-!                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-!                enddo
-
-! way 2
-! note: this should help compilers to pipeline the code and make better use of the cache;
-!          depending on compilers, it can further decrease the computation time by ~ 30%.
-!          by default, N_SLS = 3, therefore we take steps of 3
-              if(imodulo_N_SLS >= 1) then
-                do i_sls = 1,imodulo_N_SLS
-                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                  sigma_xx = sigma_xx - R_xx_val1
-                  sigma_yy = sigma_yy - R_yy_val1
-                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-                enddo
-              endif
-
-              if(N_SLS >= imodulo_N_SLS+1) then
-                do i_sls = imodulo_N_SLS+1,N_SLS,3
-                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                  sigma_xx = sigma_xx - R_xx_val1
-                  sigma_yy = sigma_yy - R_yy_val1
-                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-
-                  R_xx_val2 = R_xx(i,j,k,ispec,i_sls+1)
-                  R_yy_val2 = R_yy(i,j,k,ispec,i_sls+1)
-                  sigma_xx = sigma_xx - R_xx_val2
-                  sigma_yy = sigma_yy - R_yy_val2
-                  sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+1)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+1)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+1)
-
-                  R_xx_val3 = R_xx(i,j,k,ispec,i_sls+2)
-                  R_yy_val3 = R_yy(i,j,k,ispec,i_sls+2)
-                  sigma_xx = sigma_xx - R_xx_val3
-                  sigma_yy = sigma_yy - R_yy_val3
-                  sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+2)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+2)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+2)
-                enddo
-              endif
-
-
-              endif
-
-            ! define symmetric components of sigma
-            sigma_yx = sigma_xy
-            sigma_zx = sigma_xz
-            sigma_zy = sigma_yz
-
-            ! form dot product with test vector, non-symmetric form (which is useful in the case of PML)
-            tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
-            tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
-            tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
-
-            tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
-            tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
-            tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
-
-            tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
-            tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
-            tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
-
-            enddo
-          enddo
-        enddo
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-        ! call mxm_m1_m2_8points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
-        do j=1,m2
-          do i=1,m1
-            E1_m1_m2_8points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_8points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C1_m1_m2_8points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C1_m1_m2_8points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C1_m1_m2_8points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C1_m1_m2_8points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C1_m1_m2_8points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C1_m1_m2_8points(7,j) + &
-                                  hprimewgll_xxT(i,8)*C1_m1_m2_8points(8,j)
-            E2_m1_m2_8points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_8points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C2_m1_m2_8points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C2_m1_m2_8points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C2_m1_m2_8points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C2_m1_m2_8points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C2_m1_m2_8points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C2_m1_m2_8points(7,j) + &
-                                  hprimewgll_xxT(i,8)*C2_m1_m2_8points(8,j)
-            E3_m1_m2_8points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_8points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C3_m1_m2_8points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C3_m1_m2_8points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C3_m1_m2_8points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C3_m1_m2_8points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C3_m1_m2_8points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C3_m1_m2_8points(7,j) + &
-                                  hprimewgll_xxT(i,8)*C3_m1_m2_8points(8,j)
-          enddo
-        enddo
-
-        !   call mxm_m1_m1_8points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
-        !         hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
-        do i=1,m1
-          do j=1,m1
-            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-            do k = 1,NGLLX
-              newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempx2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempx2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempx2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempx2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempx2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempx2(i,7,k)*hprimewgll_xx(7,j) + &
-                               tempx2(i,8,k)*hprimewgll_xx(8,j)
-              newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempy2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempy2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempy2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempy2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempy2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempy2(i,7,k)*hprimewgll_xx(7,j) + &
-                               tempy2(i,8,k)*hprimewgll_xx(8,j)
-              newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempz2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempz2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempz2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempz2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempz2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempz2(i,7,k)*hprimewgll_xx(7,j) + &
-                               tempz2(i,8,k)*hprimewgll_xx(8,j)
-            enddo
-          enddo
-        enddo
-
-        ! call mxm_m2_m1_8points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
-        do j=1,m1
-          do i=1,m2
-            E1_mxm_m2_m1_8points(i,j) = C1_mxm_m2_m1_8points(i,1)*hprimewgll_xx(1,j) + &
-                                      C1_mxm_m2_m1_8points(i,2)*hprimewgll_xx(2,j) + &
-                                      C1_mxm_m2_m1_8points(i,3)*hprimewgll_xx(3,j) + &
-                                      C1_mxm_m2_m1_8points(i,4)*hprimewgll_xx(4,j) + &
-                                      C1_mxm_m2_m1_8points(i,5)*hprimewgll_xx(5,j) + &
-                                      C1_mxm_m2_m1_8points(i,6)*hprimewgll_xx(6,j) + &
-                                      C1_mxm_m2_m1_8points(i,7)*hprimewgll_xx(7,j) + &
-                                      C1_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j)
-            E2_mxm_m2_m1_8points(i,j) = C2_mxm_m2_m1_8points(i,1)*hprimewgll_xx(1,j) + &
-                                      C2_mxm_m2_m1_8points(i,2)*hprimewgll_xx(2,j) + &
-                                      C2_mxm_m2_m1_8points(i,3)*hprimewgll_xx(3,j) + &
-                                      C2_mxm_m2_m1_8points(i,4)*hprimewgll_xx(4,j) + &
-                                      C2_mxm_m2_m1_8points(i,5)*hprimewgll_xx(5,j) + &
-                                      C2_mxm_m2_m1_8points(i,6)*hprimewgll_xx(6,j) + &
-                                      C2_mxm_m2_m1_8points(i,7)*hprimewgll_xx(7,j) + &
-                                      C2_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j)
-            E3_mxm_m2_m1_8points(i,j) = C3_mxm_m2_m1_8points(i,1)*hprimewgll_xx(1,j) + &
-                                      C3_mxm_m2_m1_8points(i,2)*hprimewgll_xx(2,j) + &
-                                      C3_mxm_m2_m1_8points(i,3)*hprimewgll_xx(3,j) + &
-                                      C3_mxm_m2_m1_8points(i,4)*hprimewgll_xx(4,j) + &
-                                      C3_mxm_m2_m1_8points(i,5)*hprimewgll_xx(5,j) + &
-                                      C3_mxm_m2_m1_8points(i,6)*hprimewgll_xx(6,j) + &
-                                      C3_mxm_m2_m1_8points(i,7)*hprimewgll_xx(7,j) + &
-                                      C3_mxm_m2_m1_8points(i,8)*hprimewgll_xx(8,j)
-          enddo
-        enddo
-
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-
-              fac1 = wgllwgll_yz(j,k)
-              fac2 = wgllwgll_xz(i,k)
-              fac3 = wgllwgll_xy(i,j)
-
-              ! sum contributions from each element to the global mesh using indirect addressing
-              iglob = ibool(i,j,k,ispec)
-              accel(1,iglob) = accel(1,iglob) - fac1*newtempx1(i,j,k) - &
-                                fac2*newtempx2(i,j,k) - fac3*newtempx3(i,j,k)
-              accel(2,iglob) = accel(2,iglob) - fac1*newtempy1(i,j,k) - &
-                                fac2*newtempy2(i,j,k) - fac3*newtempy3(i,j,k)
-              accel(3,iglob) = accel(3,iglob) - fac1*newtempz1(i,j,k) - &
-                                fac2*newtempz2(i,j,k) - fac3*newtempz3(i,j,k)
-
-              !  update memory variables based upon the Runge-Kutta scheme
-              if(ATTENUATION) then
-
-                 ! use Runge-Kutta scheme to march in time
-                 do i_sls = 1,N_SLS
-
-                    factor_loc = mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
-
-                    alphaval_loc = alphaval(i_sls)
-                    betaval_loc = betaval(i_sls)
-                    gammaval_loc = gammaval(i_sls)
-
-                    ! term in xx
-                    Sn   = factor_loc * epsilondev_xx(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xx_loc(i,j,k)
-                    R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in yy
-                    Sn   = factor_loc * epsilondev_yy(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_yy_loc(i,j,k)
-                    R_yy(i,j,k,ispec,i_sls) = alphaval_loc * R_yy(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in zz not computed since zero trace
-                    ! term in xy
-                    Sn   = factor_loc * epsilondev_xy(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xy_loc(i,j,k)
-                    R_xy(i,j,k,ispec,i_sls) = alphaval_loc * R_xy(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in xz
-                    Sn   = factor_loc * epsilondev_xz(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xz_loc(i,j,k)
-                    R_xz(i,j,k,ispec,i_sls) = alphaval_loc * R_xz(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in yz
-                    Sn   = factor_loc * epsilondev_yz(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_yz_loc(i,j,k)
-                    R_yz(i,j,k,ispec,i_sls) = alphaval_loc * R_yz(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-
-                 enddo   ! end of loop on memory variables
-
-              endif  !  end attenuation
-
-            enddo
-          enddo
-        enddo
-
-        ! save deviatoric strain for Runge-Kutta scheme
-        if ( COMPUTE_AND_STORE_STRAIN ) then
-          epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
-          epsilondev_yy(:,:,:,ispec) = epsilondev_yy_loc(:,:,:)
-          epsilondev_xy(:,:,:,ispec) = epsilondev_xy_loc(:,:,:)
-          epsilondev_xz(:,:,:,ispec) = epsilondev_xz_loc(:,:,:)
-          epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:)
-        endif
-
-  enddo  ! spectral element loop
-
-end subroutine compute_forces_elastic_Dev_8p
-
-!
-!=====================================================================
-!
-
-subroutine compute_forces_elastic_Dev_9p( iphase ,NSPEC_AB,NGLOB_AB, &
-                                    displ,veloc,accel, &
-                                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                                    hprime_xx,hprime_xxT, &
-                                    hprimewgll_xx,hprimewgll_xxT, &
-                                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                                    kappastore,mustore,jacobian,ibool, &
-                                    ATTENUATION,deltat, &
-                                    one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
-                                    NSPEC_ATTENUATION_AB, &
-                                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                                    ANISOTROPY,NSPEC_ANISO, &
-                                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                                    c55store,c56store,c66store, &
-                                    SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
-                                    is_moho_top,is_moho_bot, &
-                                    dsdx_top,dsdx_bot, &
-                                    ispec2D_moho_top,ispec2D_moho_bot, &
-                                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                                    phase_ispec_inner_elastic)
-
-
-! computes elastic tensor term
-
-  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
-                      N_SLS,SAVE_MOHO_MESH, &
-                      ONE_THIRD,FOUR_THIRDS,m1,m2
-  implicit none
-
-  integer :: NSPEC_AB,NGLOB_AB
-
-! displacement, velocity and acceleration
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,veloc,accel
-
-! time step
-  real(kind=CUSTOM_REAL) :: deltat
-
-! arrays with mesh parameters per slice
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        kappastore,mustore,jacobian
-
-! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,9) :: hprime_xx,hprimewgll_xxT
-  real(kind=CUSTOM_REAL), dimension(9,NGLLX) :: hprime_xxT,hprimewgll_xx
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-! memory variables and standard linear solids for attenuation
-  logical :: ATTENUATION
-  logical :: COMPUTE_AND_STORE_STRAIN
-  integer :: NSPEC_STRAIN_ONLY, NSPEC_ADJOINT
-  integer :: NSPEC_ATTENUATION_AB
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: one_minus_sum_beta
-  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: factor_common
-  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB,N_SLS) :: &
-      R_xx,R_yy,R_xy,R_xz,R_yz
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY) :: &
-       epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
-  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilon_trace_over_3
-
-! anisotropy
-  logical :: ANISOTROPY
-  integer :: NSPEC_ANISO
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
-            c11store,c12store,c13store,c14store,c15store,c16store, &
-            c22store,c23store,c24store,c25store,c26store,c33store, &
-            c34store,c35store,c36store,c44store,c45store,c46store, &
-            c55store,c56store,c66store
-
-  integer :: iphase
-  integer :: num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic
-  integer, dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
-
-! adjoint simulations
-  integer :: SIMULATION_TYPE
-  integer :: NSPEC_BOUN,NSPEC2D_MOHO
-
-  ! moho kernel
-  real(kind=CUSTOM_REAL),dimension(NDIM,NDIM,NGLLX,NGLLY,NGLLZ,NSPEC2D_MOHO):: &
-    dsdx_top,dsdx_bot
-  logical,dimension(NSPEC_BOUN) :: is_moho_top,is_moho_bot
-  integer :: ispec2D_moho_top, ispec2D_moho_bot
-
-! local parameters
-  real(kind=CUSTOM_REAL), dimension(9,9,9) :: dummyx_loc,dummyy_loc,dummyz_loc, &
-    newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
-  real(kind=CUSTOM_REAL), dimension(9,9,9) :: &
-    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
-
-  ! manually inline the calls to the Deville et al. (2002) routines
-  real(kind=CUSTOM_REAL), dimension(9,81) :: B1_m1_m2_9points,B2_m1_m2_9points,B3_m1_m2_9points
-  real(kind=CUSTOM_REAL), dimension(9,81) :: C1_m1_m2_9points,C2_m1_m2_9points,C3_m1_m2_9points
-  real(kind=CUSTOM_REAL), dimension(9,81) :: E1_m1_m2_9points,E2_m1_m2_9points,E3_m1_m2_9points
-
-  equivalence(dummyx_loc,B1_m1_m2_9points)
-  equivalence(dummyy_loc,B2_m1_m2_9points)
-  equivalence(dummyz_loc,B3_m1_m2_9points)
-  equivalence(tempx1,C1_m1_m2_9points)
-  equivalence(tempy1,C2_m1_m2_9points)
-  equivalence(tempz1,C3_m1_m2_9points)
-  equivalence(newtempx1,E1_m1_m2_9points)
-  equivalence(newtempy1,E2_m1_m2_9points)
-  equivalence(newtempz1,E3_m1_m2_9points)
-
-  real(kind=CUSTOM_REAL), dimension(9,9,9) :: &
-    tempx1_att,tempx2_att,tempx3_att,tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
-  real(kind=CUSTOM_REAL), dimension(9,9,9) :: dummyx_loc_att,dummyy_loc_att,dummyz_loc_att
-  real(kind=CUSTOM_REAL), dimension(9,81) :: B1_m1_m2_9points_att,B2_m1_m2_9points_att,B3_m1_m2_9points_att
-  real(kind=CUSTOM_REAL), dimension(9,81) :: C1_m1_m2_9points_att,C2_m1_m2_9points_att,C3_m1_m2_9points_att
-
-  equivalence(dummyx_loc_att,B1_m1_m2_9points_att)
-  equivalence(dummyy_loc_att,B2_m1_m2_9points_att)
-  equivalence(dummyz_loc_att,B3_m1_m2_9points_att)
-  equivalence(tempx1_att,C1_m1_m2_9points_att)
-  equivalence(tempy1_att,C2_m1_m2_9points_att)
-  equivalence(tempz1_att,C3_m1_m2_9points_att)
-
-  real(kind=CUSTOM_REAL), dimension(81,9) :: &
-    A1_mxm_m2_m1_9points,A2_mxm_m2_m1_9points,A3_mxm_m2_m1_9points
-  real(kind=CUSTOM_REAL), dimension(81,9) :: &
-    C1_mxm_m2_m1_9points,C2_mxm_m2_m1_9points,C3_mxm_m2_m1_9points
-  real(kind=CUSTOM_REAL), dimension(81,9) :: &
-    E1_mxm_m2_m1_9points,E2_mxm_m2_m1_9points,E3_mxm_m2_m1_9points
-
-  equivalence(dummyx_loc,A1_mxm_m2_m1_9points)
-  equivalence(dummyy_loc,A2_mxm_m2_m1_9points)
-  equivalence(dummyz_loc,A3_mxm_m2_m1_9points)
-  equivalence(tempx3,C1_mxm_m2_m1_9points)
-  equivalence(tempy3,C2_mxm_m2_m1_9points)
-  equivalence(tempz3,C3_mxm_m2_m1_9points)
-  equivalence(newtempx3,E1_mxm_m2_m1_9points)
-  equivalence(newtempy3,E2_mxm_m2_m1_9points)
-  equivalence(newtempz3,E3_mxm_m2_m1_9points)
-
-  real(kind=CUSTOM_REAL), dimension(81,9) :: &
-    A1_mxm_m2_m1_9points_att,A2_mxm_m2_m1_9points_att,A3_mxm_m2_m1_9points_att
-  real(kind=CUSTOM_REAL), dimension(81,9) :: &
-    C1_mxm_m2_m1_9points_att,C2_mxm_m2_m1_9points_att,C3_mxm_m2_m1_9points_att
-
-  equivalence(dummyx_loc_att,A1_mxm_m2_m1_9points_att)
-  equivalence(dummyy_loc_att,A2_mxm_m2_m1_9points_att)
-  equivalence(dummyz_loc_att,A3_mxm_m2_m1_9points_att)
-  equivalence(tempx3_att,C1_mxm_m2_m1_9points_att)
-  equivalence(tempy3_att,C2_mxm_m2_m1_9points_att)
-  equivalence(tempz3_att,C3_mxm_m2_m1_9points_att)
-
-  ! local attenuation parameters
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: epsilondev_xx_loc, &
-       epsilondev_yy_loc, epsilondev_xy_loc, epsilondev_xz_loc, epsilondev_yz_loc
-  real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3
-  real(kind=CUSTOM_REAL) factor_loc,alphaval_loc,betaval_loc,gammaval_loc
-  real(kind=CUSTOM_REAL) Sn,Snp1
-  real(kind=CUSTOM_REAL) templ
-
-  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
-  real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
-
-  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
-
-  real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy
-
-  real(kind=CUSTOM_REAL) fac1,fac2,fac3
-
-  real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
-  real(kind=CUSTOM_REAL) kappal
-
-  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
-  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att
-
-  ! local anisotropy parameters
-  real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
-                        c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
-
-  integer i_SLS,imodulo_N_SLS
-  integer ispec,iglob,ispec_p,num_elements
-  integer i,j,k
-
-  imodulo_N_SLS = mod(N_SLS,3)
-
-  ! choses inner/outer elements
-  if( iphase == 1 ) then
-    num_elements = nspec_outer_elastic
-  else
-    num_elements = nspec_inner_elastic
-  endif
-
-  do ispec_p = 1,num_elements
-
-        ! returns element id from stored element list
-        ispec = phase_ispec_inner_elastic(ispec_p,iphase)
-
-        ! adjoint simulations: moho kernel
-        if( SIMULATION_TYPE == 3 .and. SAVE_MOHO_MESH ) then
-          if (is_moho_top(ispec)) then
-            ispec2D_moho_top = ispec2D_moho_top + 1
-          else if (is_moho_bot(ispec)) then
-            ispec2D_moho_bot = ispec2D_moho_bot + 1
-          endif
-        endif ! adjoint
-
-        ! stores displacment values in local array
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-                iglob = ibool(i,j,k,ispec)
-                dummyx_loc(i,j,k) = displ(1,iglob)
-                dummyy_loc(i,j,k) = displ(2,iglob)
-                dummyz_loc(i,j,k) = displ(3,iglob)
-            enddo
-          enddo
-        enddo
-
-        ! use first order Taylor expansion of displacement for local storage of stresses 
-        ! at this current time step, to fix attenuation in a consistent way
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           do k=1,NGLLZ
-              do j=1,NGLLY
-                 do i=1,NGLLX
-                    iglob = ibool(i,j,k,ispec)
-                    dummyx_loc_att(i,j,k) = deltat*veloc(1,iglob)
-                    dummyy_loc_att(i,j,k) = deltat*veloc(2,iglob)
-                    dummyz_loc_att(i,j,k) = deltat*veloc(3,iglob)
-                 enddo
-              enddo
-           enddo
-        endif
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-        ! call mxm_m1_m2_9points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
-        do j=1,m2
-          do i=1,m1
-            C1_m1_m2_9points(i,j) = hprime_xx(i,1)*B1_m1_m2_9points(1,j) + &
-                                  hprime_xx(i,2)*B1_m1_m2_9points(2,j) + &
-                                  hprime_xx(i,3)*B1_m1_m2_9points(3,j) + &
-                                  hprime_xx(i,4)*B1_m1_m2_9points(4,j) + &
-                                  hprime_xx(i,5)*B1_m1_m2_9points(5,j) + &
-                                  hprime_xx(i,6)*B1_m1_m2_9points(6,j) + &
-                                  hprime_xx(i,7)*B1_m1_m2_9points(7,j) + &
-                                  hprime_xx(i,8)*B1_m1_m2_9points(8,j) + &
-                                  hprime_xx(i,9)*B1_m1_m2_9points(9,j)
-            C2_m1_m2_9points(i,j) = hprime_xx(i,1)*B2_m1_m2_9points(1,j) + &
-                                  hprime_xx(i,2)*B2_m1_m2_9points(2,j) + &
-                                  hprime_xx(i,3)*B2_m1_m2_9points(3,j) + &
-                                  hprime_xx(i,4)*B2_m1_m2_9points(4,j) + &
-                                  hprime_xx(i,5)*B2_m1_m2_9points(5,j) + &
-                                  hprime_xx(i,6)*B2_m1_m2_9points(6,j) + &
-                                  hprime_xx(i,7)*B2_m1_m2_9points(7,j) + &
-                                  hprime_xx(i,8)*B2_m1_m2_9points(8,j) + &
-                                  hprime_xx(i,9)*B2_m1_m2_9points(9,j)
-            C3_m1_m2_9points(i,j) = hprime_xx(i,1)*B3_m1_m2_9points(1,j) + &
-                                  hprime_xx(i,2)*B3_m1_m2_9points(2,j) + &
-                                  hprime_xx(i,3)*B3_m1_m2_9points(3,j) + &
-                                  hprime_xx(i,4)*B3_m1_m2_9points(4,j) + &
-                                  hprime_xx(i,5)*B3_m1_m2_9points(5,j) + &
-                                  hprime_xx(i,6)*B3_m1_m2_9points(6,j) + &
-                                  hprime_xx(i,7)*B3_m1_m2_9points(7,j) + &
-                                  hprime_xx(i,8)*B3_m1_m2_9points(8,j) + &
-                                  hprime_xx(i,9)*B3_m1_m2_9points(9,j)
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m2
-              do i=1,m1
-                 C1_m1_m2_9points_att(i,j) = C1_m1_m2_9points(i,j) + & 
-                      hprime_xx(i,1)*B1_m1_m2_9points_att(1,j) + &
-                      hprime_xx(i,2)*B1_m1_m2_9points_att(2,j) + &
-                      hprime_xx(i,3)*B1_m1_m2_9points_att(3,j) + &
-                      hprime_xx(i,4)*B1_m1_m2_9points_att(4,j) + &
-                      hprime_xx(i,5)*B1_m1_m2_9points_att(5,j) + &
-                      hprime_xx(i,6)*B1_m1_m2_9points_att(6,j) + &
-                      hprime_xx(i,7)*B1_m1_m2_9points_att(7,j) + &
-                      hprime_xx(i,8)*B1_m1_m2_9points_att(8,j) + &
-                      hprime_xx(i,9)*B1_m1_m2_9points_att(9,j)
-
-                 C2_m1_m2_9points_att(i,j) = C2_m1_m2_9points(i,j) + &
-                      hprime_xx(i,1)*B2_m1_m2_9points_att(1,j) + &
-                      hprime_xx(i,2)*B2_m1_m2_9points_att(2,j) + &
-                      hprime_xx(i,3)*B2_m1_m2_9points_att(3,j) + &
-                      hprime_xx(i,4)*B2_m1_m2_9points_att(4,j) + &
-                      hprime_xx(i,5)*B2_m1_m2_9points_att(5,j) + &
-                      hprime_xx(i,6)*B2_m1_m2_9points_att(6,j) + &
-                      hprime_xx(i,7)*B2_m1_m2_9points_att(7,j) + &
-                      hprime_xx(i,8)*B2_m1_m2_9points_att(8,j) + &
-                      hprime_xx(i,9)*B2_m1_m2_9points_att(9,j)
-
-                 C3_m1_m2_9points_att(i,j) = C3_m1_m2_9points(i,j) + &
-                      hprime_xx(i,1)*B3_m1_m2_9points_att(1,j) + &
-                      hprime_xx(i,2)*B3_m1_m2_9points_att(2,j) + &
-                      hprime_xx(i,3)*B3_m1_m2_9points_att(3,j) + &
-                      hprime_xx(i,4)*B3_m1_m2_9points_att(4,j) + &
-                      hprime_xx(i,5)*B3_m1_m2_9points_att(5,j) + &
-                      hprime_xx(i,6)*B3_m1_m2_9points_att(6,j) + &
-                      hprime_xx(i,7)*B3_m1_m2_9points_att(7,j) + &
-                      hprime_xx(i,8)*B3_m1_m2_9points_att(8,j) + &
-                      hprime_xx(i,9)*B3_m1_m2_9points_att(9,j)
-              enddo
-           enddo
-        endif
-
-        !   call mxm_m1_m1_9points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
-        !          hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
-        do j=1,m1
-          do i=1,m1
-            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-            do k = 1,NGLLX
-              tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyx_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyx_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyx_loc(i,7,k)*hprime_xxT(7,j) + &
-                            dummyx_loc(i,8,k)*hprime_xxT(8,j) + &
-                            dummyx_loc(i,9,k)*hprime_xxT(9,j)
-              tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyy_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyy_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyy_loc(i,7,k)*hprime_xxT(7,j) + &
-                            dummyy_loc(i,8,k)*hprime_xxT(8,j) + &
-                            dummyy_loc(i,9,k)*hprime_xxT(9,j)
-              tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyz_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyz_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyz_loc(i,7,k)*hprime_xxT(7,j) + &
-                            dummyz_loc(i,8,k)*hprime_xxT(8,j) + &
-                            dummyz_loc(i,9,k)*hprime_xxT(9,j)
-            enddo
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m1
-              do i=1,m1
-                 ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-                 do k = 1,NGLLX
-                    tempx2_att(i,j,k) = tempx2(i,j,k) + &
-                         dummyx_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyx_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyx_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyx_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyx_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyx_loc_att(i,6,k)*hprime_xxT(6,j) + &
-                         dummyx_loc_att(i,7,k)*hprime_xxT(7,j) + &
-                         dummyx_loc_att(i,8,k)*hprime_xxT(8,j) + &
-                         dummyx_loc_att(i,9,k)*hprime_xxT(9,j)
-
-                    tempy2_att(i,j,k) = tempy2(i,j,k) + &
-                         dummyy_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyy_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyy_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyy_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyy_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyy_loc_att(i,6,k)*hprime_xxT(6,j) + &
-                         dummyy_loc_att(i,7,k)*hprime_xxT(7,j) + &
-                         dummyy_loc_att(i,8,k)*hprime_xxT(8,j) + &
-                         dummyy_loc_att(i,9,k)*hprime_xxT(9,j)
-
-                    tempz2_att(i,j,k) = tempz2(i,j,k) + &
-                         dummyz_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyz_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyz_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyz_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyz_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyz_loc_att(i,6,k)*hprime_xxT(6,j) + &
-                         dummyz_loc_att(i,7,k)*hprime_xxT(7,j) + &
-                         dummyz_loc_att(i,8,k)*hprime_xxT(8,j) + &
-                         dummyz_loc_att(i,9,k)*hprime_xxT(9,j)
-                 enddo
-              enddo
-           enddo
-        endif
-
-        ! call mxm_m2_m1_9points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
-        do j=1,m1
-          do i=1,m2
-            C1_mxm_m2_m1_9points(i,j) = A1_mxm_m2_m1_9points(i,1)*hprime_xxT(1,j) + &
-                                      A1_mxm_m2_m1_9points(i,2)*hprime_xxT(2,j) + &
-                                      A1_mxm_m2_m1_9points(i,3)*hprime_xxT(3,j) + &
-                                      A1_mxm_m2_m1_9points(i,4)*hprime_xxT(4,j) + &
-                                      A1_mxm_m2_m1_9points(i,5)*hprime_xxT(5,j) + &
-                                      A1_mxm_m2_m1_9points(i,6)*hprime_xxT(6,j) + &
-                                      A1_mxm_m2_m1_9points(i,7)*hprime_xxT(7,j) + &
-                                      A1_mxm_m2_m1_9points(i,8)*hprime_xxT(8,j) + &
-                                      A1_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j)
-            C2_mxm_m2_m1_9points(i,j) = A2_mxm_m2_m1_9points(i,1)*hprime_xxT(1,j) + &
-                                      A2_mxm_m2_m1_9points(i,2)*hprime_xxT(2,j) + &
-                                      A2_mxm_m2_m1_9points(i,3)*hprime_xxT(3,j) + &
-                                      A2_mxm_m2_m1_9points(i,4)*hprime_xxT(4,j) + &
-                                      A2_mxm_m2_m1_9points(i,5)*hprime_xxT(5,j) + &
-                                      A2_mxm_m2_m1_9points(i,6)*hprime_xxT(6,j) + &
-                                      A2_mxm_m2_m1_9points(i,7)*hprime_xxT(7,j) + &
-                                      A2_mxm_m2_m1_9points(i,8)*hprime_xxT(8,j) + &
-                                      A2_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j)
-            C3_mxm_m2_m1_9points(i,j) = A3_mxm_m2_m1_9points(i,1)*hprime_xxT(1,j) + &
-                                      A3_mxm_m2_m1_9points(i,2)*hprime_xxT(2,j) + &
-                                      A3_mxm_m2_m1_9points(i,3)*hprime_xxT(3,j) + &
-                                      A3_mxm_m2_m1_9points(i,4)*hprime_xxT(4,j) + &
-                                      A3_mxm_m2_m1_9points(i,5)*hprime_xxT(5,j) + &
-                                      A3_mxm_m2_m1_9points(i,6)*hprime_xxT(6,j) + &
-                                      A3_mxm_m2_m1_9points(i,7)*hprime_xxT(7,j) + &
-                                      A3_mxm_m2_m1_9points(i,8)*hprime_xxT(8,j) + &
-                                      A3_mxm_m2_m1_9points(i,9)*hprime_xxT(9,j)
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m1
-              do i=1,m2
-                 C1_mxm_m2_m1_9points_att(i,j) = C1_mxm_m2_m1_9points(i,j) + &
-                      A1_mxm_m2_m1_9points_att(i,1)*hprime_xxT(1,j) + &
-                      A1_mxm_m2_m1_9points_att(i,2)*hprime_xxT(2,j) + &
-                      A1_mxm_m2_m1_9points_att(i,3)*hprime_xxT(3,j) + &
-                      A1_mxm_m2_m1_9points_att(i,4)*hprime_xxT(4,j) + &
-                      A1_mxm_m2_m1_9points_att(i,5)*hprime_xxT(5,j) + &
-                      A1_mxm_m2_m1_9points_att(i,6)*hprime_xxT(6,j) + &
-                      A1_mxm_m2_m1_9points_att(i,7)*hprime_xxT(7,j) + &
-                      A1_mxm_m2_m1_9points_att(i,8)*hprime_xxT(8,j) + &
-                      A1_mxm_m2_m1_9points_att(i,9)*hprime_xxT(9,j)
-
-                 C2_mxm_m2_m1_9points_att(i,j) = C2_mxm_m2_m1_9points(i,j) + &
-                      A2_mxm_m2_m1_9points_att(i,1)*hprime_xxT(1,j) + &
-                      A2_mxm_m2_m1_9points_att(i,2)*hprime_xxT(2,j) + &
-                      A2_mxm_m2_m1_9points_att(i,3)*hprime_xxT(3,j) + &
-                      A2_mxm_m2_m1_9points_att(i,4)*hprime_xxT(4,j) + &
-                      A2_mxm_m2_m1_9points_att(i,5)*hprime_xxT(5,j) + &
-                      A2_mxm_m2_m1_9points_att(i,6)*hprime_xxT(6,j) + &
-                      A2_mxm_m2_m1_9points_att(i,7)*hprime_xxT(7,j) + &
-                      A2_mxm_m2_m1_9points_att(i,8)*hprime_xxT(8,j) + &
-                      A2_mxm_m2_m1_9points_att(i,9)*hprime_xxT(9,j)
-
-                 C3_mxm_m2_m1_9points_att(i,j) = C3_mxm_m2_m1_9points(i,j) + &
-                      A3_mxm_m2_m1_9points_att(i,1)*hprime_xxT(1,j) + &
-                      A3_mxm_m2_m1_9points_att(i,2)*hprime_xxT(2,j) + &
-                      A3_mxm_m2_m1_9points_att(i,3)*hprime_xxT(3,j) + &
-                      A3_mxm_m2_m1_9points_att(i,4)*hprime_xxT(4,j) + &
-                      A3_mxm_m2_m1_9points_att(i,5)*hprime_xxT(5,j) + &
-                      A3_mxm_m2_m1_9points_att(i,6)*hprime_xxT(6,j) + &
-                      A3_mxm_m2_m1_9points_att(i,7)*hprime_xxT(7,j) + &
-                      A3_mxm_m2_m1_9points_att(i,8)*hprime_xxT(8,j) + &
-                      A3_mxm_m2_m1_9points_att(i,9)*hprime_xxT(9,j)
-              enddo
-           enddo
-        endif
-
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-              ! get derivatives of ux, uy and uz with respect to x, y and z
-              xixl = xix(i,j,k,ispec)
-              xiyl = xiy(i,j,k,ispec)
-              xizl = xiz(i,j,k,ispec)
-              etaxl = etax(i,j,k,ispec)
-              etayl = etay(i,j,k,ispec)
-              etazl = etaz(i,j,k,ispec)
-              gammaxl = gammax(i,j,k,ispec)
-              gammayl = gammay(i,j,k,ispec)
-              gammazl = gammaz(i,j,k,ispec)
-              jacobianl = jacobian(i,j,k,ispec)
-
-              duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
-              duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
-              duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
-
-              duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
-              duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
-              duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
-
-              duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
-              duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
-              duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
-
-              ! save strain on the Moho boundary
-              if (SAVE_MOHO_MESH ) then
-                if (is_moho_top(ispec)) then
-                  dsdx_top(1,1,i,j,k,ispec2D_moho_top) = duxdxl
-                  dsdx_top(1,2,i,j,k,ispec2D_moho_top) = duxdyl
-                  dsdx_top(1,3,i,j,k,ispec2D_moho_top) = duxdzl
-                  dsdx_top(2,1,i,j,k,ispec2D_moho_top) = duydxl
-                  dsdx_top(2,2,i,j,k,ispec2D_moho_top) = duydyl
-                  dsdx_top(2,3,i,j,k,ispec2D_moho_top) = duydzl
-                  dsdx_top(3,1,i,j,k,ispec2D_moho_top) = duzdxl
-                  dsdx_top(3,2,i,j,k,ispec2D_moho_top) = duzdyl
-                  dsdx_top(3,3,i,j,k,ispec2D_moho_top) = duzdzl
-                else if (is_moho_bot(ispec)) then
-                  dsdx_bot(1,1,i,j,k,ispec2D_moho_bot) = duxdxl
-                  dsdx_bot(1,2,i,j,k,ispec2D_moho_bot) = duxdyl
-                  dsdx_bot(1,3,i,j,k,ispec2D_moho_bot) = duxdzl
-                  dsdx_bot(2,1,i,j,k,ispec2D_moho_bot) = duydxl
-                  dsdx_bot(2,2,i,j,k,ispec2D_moho_bot) = duydyl
-                  dsdx_bot(2,3,i,j,k,ispec2D_moho_bot) = duydzl
-                  dsdx_bot(3,1,i,j,k,ispec2D_moho_bot) = duzdxl
-                  dsdx_bot(3,2,i,j,k,ispec2D_moho_bot) = duzdyl
-                  dsdx_bot(3,3,i,j,k,ispec2D_moho_bot) = duzdzl
-                endif
-              endif
-
-              ! precompute some sums to save CPU time
-              duxdxl_plus_duydyl = duxdxl + duydyl
-              duxdxl_plus_duzdzl = duxdxl + duzdzl
-              duydyl_plus_duzdzl = duydyl + duzdzl
-              duxdyl_plus_duydxl = duxdyl + duydxl
-              duzdxl_plus_duxdzl = duzdxl + duxdzl
-              duzdyl_plus_duydzl = duzdyl + duydzl
-
-              if ( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
-                 ! temporary variables used for fixing attenuation in a consistent way
-                 duxdxl_att = xixl*tempx1_att(i,j,k) + etaxl*tempx2_att(i,j,k) + gammaxl*tempx3_att(i,j,k)
-                 duxdyl_att = xiyl*tempx1_att(i,j,k) + etayl*tempx2_att(i,j,k) + gammayl*tempx3_att(i,j,k)
-                 duxdzl_att = xizl*tempx1_att(i,j,k) + etazl*tempx2_att(i,j,k) + gammazl*tempx3_att(i,j,k)
-
-                 duydxl_att = xixl*tempy1_att(i,j,k) + etaxl*tempy2_att(i,j,k) + gammaxl*tempy3_att(i,j,k)
-                 duydyl_att = xiyl*tempy1_att(i,j,k) + etayl*tempy2_att(i,j,k) + gammayl*tempy3_att(i,j,k)
-                 duydzl_att = xizl*tempy1_att(i,j,k) + etazl*tempy2_att(i,j,k) + gammazl*tempy3_att(i,j,k)
-
-                 duzdxl_att = xixl*tempz1_att(i,j,k) + etaxl*tempz2_att(i,j,k) + gammaxl*tempz3_att(i,j,k)
-                 duzdyl_att = xiyl*tempz1_att(i,j,k) + etayl*tempz2_att(i,j,k) + gammayl*tempz3_att(i,j,k)
-                 duzdzl_att = xizl*tempz1_att(i,j,k) + etazl*tempz2_att(i,j,k) + gammazl*tempz3_att(i,j,k)
-
-                 ! precompute some sums to save CPU time
-                 duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
-                 duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
-                 duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
-
-                 ! compute deviatoric strain
-                 templ = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
-                 if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                 epsilondev_xx_loc(i,j,k) = duxdxl_att - templ
-                 epsilondev_yy_loc(i,j,k) = duydyl_att - templ
-                 epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl_att
-                 epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
-                 epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl_att
-              else
-                 ! computes deviatoric strain attenuation and/or for kernel calculations
-                 if (COMPUTE_AND_STORE_STRAIN) then
-                    templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
-                    if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                    epsilondev_xx_loc(i,j,k) = duxdxl - templ
-                    epsilondev_yy_loc(i,j,k) = duydyl - templ
-                    epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl
-                    epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl
-                    epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl
-                 endif
-              endif
-
-              kappal = kappastore(i,j,k,ispec)
-              mul = mustore(i,j,k,ispec)
-
-              ! attenuation
-              if(ATTENUATION) then
-                ! use unrelaxed parameters if attenuation
-                mul  = mul * one_minus_sum_beta(i,j,k,ispec)
-              endif
-
-  ! full anisotropic case, stress calculations
-              if(ANISOTROPY) then
-                c11 = c11store(i,j,k,ispec)
-                c12 = c12store(i,j,k,ispec)
-                c13 = c13store(i,j,k,ispec)
-                c14 = c14store(i,j,k,ispec)
-                c15 = c15store(i,j,k,ispec)
-                c16 = c16store(i,j,k,ispec)
-                c22 = c22store(i,j,k,ispec)
-                c23 = c23store(i,j,k,ispec)
-                c24 = c24store(i,j,k,ispec)
-                c25 = c25store(i,j,k,ispec)
-                c26 = c26store(i,j,k,ispec)
-                c33 = c33store(i,j,k,ispec)
-                c34 = c34store(i,j,k,ispec)
-                c35 = c35store(i,j,k,ispec)
-                c36 = c36store(i,j,k,ispec)
-                c44 = c44store(i,j,k,ispec)
-                c45 = c45store(i,j,k,ispec)
-                c46 = c46store(i,j,k,ispec)
-                c55 = c55store(i,j,k,ispec)
-                c56 = c56store(i,j,k,ispec)
-                c66 = c66store(i,j,k,ispec)
-
-                sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
-                          c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
-                sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
-                          c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
-                sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
-                          c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
-                sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
-                          c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
-                sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
-                          c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
-                sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
-                          c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
-
-              else
-
-  ! isotropic case
-                lambdalplus2mul = kappal + FOUR_THIRDS * mul
-                lambdal = lambdalplus2mul - 2.*mul
-
-                ! compute stress sigma
-                sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
-                sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
-                sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
-
-                sigma_xy = mul*duxdyl_plus_duydxl
-                sigma_xz = mul*duzdxl_plus_duxdzl
-                sigma_yz = mul*duzdyl_plus_duydzl
-
-              endif ! ANISOTROPY
-
-              ! subtract memory variables if attenuation
-              if(ATTENUATION) then
-! way 1
-!                do i_sls = 1,N_SLS
-!                  R_xx_val = R_xx(i,j,k,ispec,i_sls)
-!                  R_yy_val = R_yy(i,j,k,ispec,i_sls)
-!                  sigma_xx = sigma_xx - R_xx_val
-!                  sigma_yy = sigma_yy - R_yy_val
-!                  sigma_zz = sigma_zz + R_xx_val + R_yy_val
-!                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-!                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-!                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-!                enddo
-
-! way 2
-! note: this should help compilers to pipeline the code and make better use of the cache;
-!          depending on compilers, it can further decrease the computation time by ~ 30%.
-!          by default, N_SLS = 3, therefore we take steps of 3
-              if(imodulo_N_SLS >= 1) then
-                do i_sls = 1,imodulo_N_SLS
-                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                  sigma_xx = sigma_xx - R_xx_val1
-                  sigma_yy = sigma_yy - R_yy_val1
-                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-                enddo
-              endif
-
-              if(N_SLS >= imodulo_N_SLS+1) then
-                do i_sls = imodulo_N_SLS+1,N_SLS,3
-                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                  sigma_xx = sigma_xx - R_xx_val1
-                  sigma_yy = sigma_yy - R_yy_val1
-                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-
-                  R_xx_val2 = R_xx(i,j,k,ispec,i_sls+1)
-                  R_yy_val2 = R_yy(i,j,k,ispec,i_sls+1)
-                  sigma_xx = sigma_xx - R_xx_val2
-                  sigma_yy = sigma_yy - R_yy_val2
-                  sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+1)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+1)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+1)
-
-                  R_xx_val3 = R_xx(i,j,k,ispec,i_sls+2)
-                  R_yy_val3 = R_yy(i,j,k,ispec,i_sls+2)
-                  sigma_xx = sigma_xx - R_xx_val3
-                  sigma_yy = sigma_yy - R_yy_val3
-                  sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+2)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+2)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+2)
-                enddo
-              endif
-
-
-              endif
-
-            ! define symmetric components of sigma
-            sigma_yx = sigma_xy
-            sigma_zx = sigma_xz
-            sigma_zy = sigma_yz
-
-            ! form dot product with test vector, non-symmetric form (which is useful in the case of PML)
-            tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
-            tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
-            tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
-
-            tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
-            tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
-            tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
-
-            tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
-            tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
-            tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
-
-            enddo
-          enddo
-        enddo
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-        ! call mxm_m1_m2_9points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
-        do j=1,m2
-          do i=1,m1
-            E1_m1_m2_9points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_9points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C1_m1_m2_9points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C1_m1_m2_9points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C1_m1_m2_9points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C1_m1_m2_9points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C1_m1_m2_9points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C1_m1_m2_9points(7,j) + &
-                                  hprimewgll_xxT(i,8)*C1_m1_m2_9points(8,j) + &
-                                  hprimewgll_xxT(i,9)*C1_m1_m2_9points(9,j)
-            E2_m1_m2_9points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_9points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C2_m1_m2_9points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C2_m1_m2_9points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C2_m1_m2_9points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C2_m1_m2_9points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C2_m1_m2_9points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C2_m1_m2_9points(7,j) + &
-                                  hprimewgll_xxT(i,8)*C2_m1_m2_9points(8,j) + &
-                                  hprimewgll_xxT(i,9)*C2_m1_m2_9points(9,j)
-            E3_m1_m2_9points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_9points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C3_m1_m2_9points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C3_m1_m2_9points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C3_m1_m2_9points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C3_m1_m2_9points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C3_m1_m2_9points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C3_m1_m2_9points(7,j) + &
-                                  hprimewgll_xxT(i,8)*C3_m1_m2_9points(8,j) + &
-                                  hprimewgll_xxT(i,9)*C3_m1_m2_9points(9,j)
-          enddo
-        enddo
-
-        !   call mxm_m1_m1_9points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
-        !         hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
-        do i=1,m1
-          do j=1,m1
-            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-            do k = 1,NGLLX
-              newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempx2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempx2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempx2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempx2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempx2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempx2(i,7,k)*hprimewgll_xx(7,j) + &
-                               tempx2(i,8,k)*hprimewgll_xx(8,j) + &
-                               tempx2(i,9,k)*hprimewgll_xx(9,j)
-              newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempy2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempy2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempy2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempy2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempy2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempy2(i,7,k)*hprimewgll_xx(7,j) + &
-                               tempy2(i,8,k)*hprimewgll_xx(8,j) + &
-                               tempy2(i,9,k)*hprimewgll_xx(9,j)
-              newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempz2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempz2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempz2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempz2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempz2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempz2(i,7,k)*hprimewgll_xx(7,j) + &
-                               tempz2(i,8,k)*hprimewgll_xx(8,j) + &
-                               tempz2(i,9,k)*hprimewgll_xx(9,j)
-            enddo
-          enddo
-        enddo
-
-        ! call mxm_m2_m1_9points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
-        do j=1,m1
-          do i=1,m2
-            E1_mxm_m2_m1_9points(i,j) = C1_mxm_m2_m1_9points(i,1)*hprimewgll_xx(1,j) + &
-                                      C1_mxm_m2_m1_9points(i,2)*hprimewgll_xx(2,j) + &
-                                      C1_mxm_m2_m1_9points(i,3)*hprimewgll_xx(3,j) + &
-                                      C1_mxm_m2_m1_9points(i,4)*hprimewgll_xx(4,j) + &
-                                      C1_mxm_m2_m1_9points(i,5)*hprimewgll_xx(5,j) + &
-                                      C1_mxm_m2_m1_9points(i,6)*hprimewgll_xx(6,j) + &
-                                      C1_mxm_m2_m1_9points(i,7)*hprimewgll_xx(7,j) + &
-                                      C1_mxm_m2_m1_9points(i,8)*hprimewgll_xx(8,j) + &
-                                      C1_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j)
-            E2_mxm_m2_m1_9points(i,j) = C2_mxm_m2_m1_9points(i,1)*hprimewgll_xx(1,j) + &
-                                      C2_mxm_m2_m1_9points(i,2)*hprimewgll_xx(2,j) + &
-                                      C2_mxm_m2_m1_9points(i,3)*hprimewgll_xx(3,j) + &
-                                      C2_mxm_m2_m1_9points(i,4)*hprimewgll_xx(4,j) + &
-                                      C2_mxm_m2_m1_9points(i,5)*hprimewgll_xx(5,j) + &
-                                      C2_mxm_m2_m1_9points(i,6)*hprimewgll_xx(6,j) + &
-                                      C2_mxm_m2_m1_9points(i,7)*hprimewgll_xx(7,j) + &
-                                      C2_mxm_m2_m1_9points(i,8)*hprimewgll_xx(8,j) + &
-                                      C2_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j)
-            E3_mxm_m2_m1_9points(i,j) = C3_mxm_m2_m1_9points(i,1)*hprimewgll_xx(1,j) + &
-                                      C3_mxm_m2_m1_9points(i,2)*hprimewgll_xx(2,j) + &
-                                      C3_mxm_m2_m1_9points(i,3)*hprimewgll_xx(3,j) + &
-                                      C3_mxm_m2_m1_9points(i,4)*hprimewgll_xx(4,j) + &
-                                      C3_mxm_m2_m1_9points(i,5)*hprimewgll_xx(5,j) + &
-                                      C3_mxm_m2_m1_9points(i,6)*hprimewgll_xx(6,j) + &
-                                      C3_mxm_m2_m1_9points(i,7)*hprimewgll_xx(7,j) + &
-                                      C3_mxm_m2_m1_9points(i,8)*hprimewgll_xx(8,j) + &
-                                      C3_mxm_m2_m1_9points(i,9)*hprimewgll_xx(9,j)
-          enddo
-        enddo
-
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-
-              fac1 = wgllwgll_yz(j,k)
-              fac2 = wgllwgll_xz(i,k)
-              fac3 = wgllwgll_xy(i,j)
-
-              ! sum contributions from each element to the global mesh using indirect addressing
-              iglob = ibool(i,j,k,ispec)
-              accel(1,iglob) = accel(1,iglob) - fac1*newtempx1(i,j,k) - &
-                                fac2*newtempx2(i,j,k) - fac3*newtempx3(i,j,k)
-              accel(2,iglob) = accel(2,iglob) - fac1*newtempy1(i,j,k) - &
-                                fac2*newtempy2(i,j,k) - fac3*newtempy3(i,j,k)
-              accel(3,iglob) = accel(3,iglob) - fac1*newtempz1(i,j,k) - &
-                                fac2*newtempz2(i,j,k) - fac3*newtempz3(i,j,k)
-
-              !  update memory variables based upon the Runge-Kutta scheme
-              if(ATTENUATION) then
-
-                 ! use Runge-Kutta scheme to march in time
-                 do i_sls = 1,N_SLS
-
-                    factor_loc = mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
-
-                    alphaval_loc = alphaval(i_sls)
-                    betaval_loc = betaval(i_sls)
-                    gammaval_loc = gammaval(i_sls)
-
-                    ! term in xx
-                    Sn   = factor_loc * epsilondev_xx(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xx_loc(i,j,k)
-                    R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in yy
-                    Sn   = factor_loc * epsilondev_yy(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_yy_loc(i,j,k)
-                    R_yy(i,j,k,ispec,i_sls) = alphaval_loc * R_yy(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in zz not computed since zero trace
-                    ! term in xy
-                    Sn   = factor_loc * epsilondev_xy(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xy_loc(i,j,k)
-                    R_xy(i,j,k,ispec,i_sls) = alphaval_loc * R_xy(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in xz
-                    Sn   = factor_loc * epsilondev_xz(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xz_loc(i,j,k)
-                    R_xz(i,j,k,ispec,i_sls) = alphaval_loc * R_xz(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in yz
-                    Sn   = factor_loc * epsilondev_yz(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_yz_loc(i,j,k)
-                    R_yz(i,j,k,ispec,i_sls) = alphaval_loc * R_yz(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-
-                 enddo   ! end of loop on memory variables
-
-              endif  !  end attenuation
-
-            enddo
-          enddo
-        enddo
-
-        ! save deviatoric strain for Runge-Kutta scheme
-        if ( COMPUTE_AND_STORE_STRAIN ) then
-          epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
-          epsilondev_yy(:,:,:,ispec) = epsilondev_yy_loc(:,:,:)
-          epsilondev_xy(:,:,:,ispec) = epsilondev_xy_loc(:,:,:)
-          epsilondev_xz(:,:,:,ispec) = epsilondev_xz_loc(:,:,:)
-          epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:)
-        endif
-
-  enddo  ! spectral element loop
-
-end subroutine compute_forces_elastic_Dev_9p
-
-!
-!=====================================================================
-!
-
-subroutine compute_forces_elastic_Dev_10p( iphase ,NSPEC_AB,NGLOB_AB, &
-                                    displ,veloc,accel, &
-                                    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                                    hprime_xx,hprime_xxT, &
-                                    hprimewgll_xx,hprimewgll_xxT, &
-                                    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                                    kappastore,mustore,jacobian,ibool, &
-                                    ATTENUATION,deltat, &
-                                    one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
-                                    NSPEC_ATTENUATION_AB, &
-                                    R_xx,R_yy,R_xy,R_xz,R_yz, &
-                                    epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                                    epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                                    ANISOTROPY,NSPEC_ANISO, &
-                                    c11store,c12store,c13store,c14store,c15store,c16store,&
-                                    c22store,c23store,c24store,c25store,c26store,c33store,&
-                                    c34store,c35store,c36store,c44store,c45store,c46store,&
-                                    c55store,c56store,c66store, &
-                                    SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                                    NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
-                                    is_moho_top,is_moho_bot, &
-                                    dsdx_top,dsdx_bot, &
-                                    ispec2D_moho_top,ispec2D_moho_bot, &
-                                    num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                                    phase_ispec_inner_elastic)
-
-
-! computes elastic tensor term
-
-  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
-                      N_SLS,SAVE_MOHO_MESH, &
-                      ONE_THIRD,FOUR_THIRDS,m1,m2
-  implicit none
-
-  integer :: NSPEC_AB,NGLOB_AB
-
-! displacement, velocity and acceleration
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,veloc,accel
-
-! time step
-  real(kind=CUSTOM_REAL) :: deltat
-
-! arrays with mesh parameters per slice
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        kappastore,mustore,jacobian
-
-! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,10) :: hprime_xx,hprimewgll_xxT
-  real(kind=CUSTOM_REAL), dimension(10,NGLLX) :: hprime_xxT,hprimewgll_xx
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-! memory variables and standard linear solids for attenuation
-  logical :: ATTENUATION
-  logical :: COMPUTE_AND_STORE_STRAIN
-  integer :: NSPEC_STRAIN_ONLY, NSPEC_ADJOINT
-  integer :: NSPEC_ATTENUATION_AB
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: one_minus_sum_beta
-  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: factor_common
-  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB,N_SLS) :: &
-      R_xx,R_yy,R_xy,R_xz,R_yz
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY) :: &
-       epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
-  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilon_trace_over_3
-
-! anisotropy
-  logical :: ANISOTROPY
-  integer :: NSPEC_ANISO
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
-            c11store,c12store,c13store,c14store,c15store,c16store, &
-            c22store,c23store,c24store,c25store,c26store,c33store, &
-            c34store,c35store,c36store,c44store,c45store,c46store, &
-            c55store,c56store,c66store
-
-  integer :: iphase
-  integer :: num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic
-  integer, dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
-
-! adjoint simulations
-  integer :: SIMULATION_TYPE
-  integer :: NSPEC_BOUN,NSPEC2D_MOHO
-
-  ! moho kernel
-  real(kind=CUSTOM_REAL),dimension(NDIM,NDIM,NGLLX,NGLLY,NGLLZ,NSPEC2D_MOHO):: &
-    dsdx_top,dsdx_bot
-  logical,dimension(NSPEC_BOUN) :: is_moho_top,is_moho_bot
-  integer :: ispec2D_moho_top, ispec2D_moho_bot
-
-! local parameters
-  real(kind=CUSTOM_REAL), dimension(10,10,10) :: dummyx_loc,dummyy_loc,dummyz_loc, &
-    newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3
-  real(kind=CUSTOM_REAL), dimension(10,10,10) :: &
-    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
-
-  ! manually inline the calls to the Deville et al. (2002) routines
-  real(kind=CUSTOM_REAL), dimension(10,100) :: B1_m1_m2_10points,B2_m1_m2_10points,B3_m1_m2_10points
-  real(kind=CUSTOM_REAL), dimension(10,100) :: C1_m1_m2_10points,C2_m1_m2_10points,C3_m1_m2_10points
-  real(kind=CUSTOM_REAL), dimension(10,100) :: E1_m1_m2_10points,E2_m1_m2_10points,E3_m1_m2_10points
-
-  equivalence(dummyx_loc,B1_m1_m2_10points)
-  equivalence(dummyy_loc,B2_m1_m2_10points)
-  equivalence(dummyz_loc,B3_m1_m2_10points)
-  equivalence(tempx1,C1_m1_m2_10points)
-  equivalence(tempy1,C2_m1_m2_10points)
-  equivalence(tempz1,C3_m1_m2_10points)
-  equivalence(newtempx1,E1_m1_m2_10points)
-  equivalence(newtempy1,E2_m1_m2_10points)
-  equivalence(newtempz1,E3_m1_m2_10points)
-
-  real(kind=CUSTOM_REAL), dimension(10,10,10) :: &
-    tempx1_att,tempx2_att,tempx3_att,tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
-  real(kind=CUSTOM_REAL), dimension(10,10,10) :: dummyx_loc_att,dummyy_loc_att,dummyz_loc_att
-  real(kind=CUSTOM_REAL), dimension(10,100) :: B1_m1_m2_10points_att,B2_m1_m2_10points_att,B3_m1_m2_10points_att
-  real(kind=CUSTOM_REAL), dimension(10,100) :: C1_m1_m2_10points_att,C2_m1_m2_10points_att,C3_m1_m2_10points_att
-
-  equivalence(dummyx_loc_att,B1_m1_m2_10points_att)
-  equivalence(dummyy_loc_att,B2_m1_m2_10points_att)
-  equivalence(dummyz_loc_att,B3_m1_m2_10points_att)
-  equivalence(tempx1_att,C1_m1_m2_10points_att)
-  equivalence(tempy1_att,C2_m1_m2_10points_att)
-  equivalence(tempz1_att,C3_m1_m2_10points_att)
-
-  real(kind=CUSTOM_REAL), dimension(100,10) :: &
-    A1_mxm_m2_m1_10points,A2_mxm_m2_m1_10points,A3_mxm_m2_m1_10points
-  real(kind=CUSTOM_REAL), dimension(100,10) :: &
-    C1_mxm_m2_m1_10points,C2_mxm_m2_m1_10points,C3_mxm_m2_m1_10points
-  real(kind=CUSTOM_REAL), dimension(100,10) :: &
-    E1_mxm_m2_m1_10points,E2_mxm_m2_m1_10points,E3_mxm_m2_m1_10points
-
-  equivalence(dummyx_loc,A1_mxm_m2_m1_10points)
-  equivalence(dummyy_loc,A2_mxm_m2_m1_10points)
-  equivalence(dummyz_loc,A3_mxm_m2_m1_10points)
-  equivalence(tempx3,C1_mxm_m2_m1_10points)
-  equivalence(tempy3,C2_mxm_m2_m1_10points)
-  equivalence(tempz3,C3_mxm_m2_m1_10points)
-  equivalence(newtempx3,E1_mxm_m2_m1_10points)
-  equivalence(newtempy3,E2_mxm_m2_m1_10points)
-  equivalence(newtempz3,E3_mxm_m2_m1_10points)
-
-  real(kind=CUSTOM_REAL), dimension(100,10) :: &
-    A1_mxm_m2_m1_10points_att,A2_mxm_m2_m1_10points_att,A3_mxm_m2_m1_10points_att
-  real(kind=CUSTOM_REAL), dimension(100,10) :: &
-    C1_mxm_m2_m1_10points_att,C2_mxm_m2_m1_10points_att,C3_mxm_m2_m1_10points_att
-
-  equivalence(dummyx_loc_att,A1_mxm_m2_m1_10points_att)
-  equivalence(dummyy_loc_att,A2_mxm_m2_m1_10points_att)
-  equivalence(dummyz_loc_att,A3_mxm_m2_m1_10points_att)
-  equivalence(tempx3_att,C1_mxm_m2_m1_10points_att)
-  equivalence(tempy3_att,C2_mxm_m2_m1_10points_att)
-  equivalence(tempz3_att,C3_mxm_m2_m1_10points_att)
-
-  ! local attenuation parameters
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: epsilondev_xx_loc, &
-       epsilondev_yy_loc, epsilondev_xy_loc, epsilondev_xz_loc, epsilondev_yz_loc
-  real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3
-  real(kind=CUSTOM_REAL) factor_loc,alphaval_loc,betaval_loc,gammaval_loc
-  real(kind=CUSTOM_REAL) Sn,Snp1
-  real(kind=CUSTOM_REAL) templ
-
-  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
-  real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
-
-  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
-
-  real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy
-
-  real(kind=CUSTOM_REAL) fac1,fac2,fac3
-
-  real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
-  real(kind=CUSTOM_REAL) kappal
-
-  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
-  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att
-
-  ! local anisotropy parameters
-  real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
-                        c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
-
-  integer i_SLS,imodulo_N_SLS
-  integer ispec,iglob,ispec_p,num_elements
-  integer i,j,k
-
-  imodulo_N_SLS = mod(N_SLS,3)
-
-  ! choses inner/outer elements
-  if( iphase == 1 ) then
-    num_elements = nspec_outer_elastic
-  else
-    num_elements = nspec_inner_elastic
-  endif
-
-  do ispec_p = 1,num_elements
-
-        ! returns element id from stored element list
-        ispec = phase_ispec_inner_elastic(ispec_p,iphase)
-
-        ! adjoint simulations: moho kernel
-        if( SIMULATION_TYPE == 3 .and. SAVE_MOHO_MESH ) then
-          if (is_moho_top(ispec)) then
-            ispec2D_moho_top = ispec2D_moho_top + 1
-          else if (is_moho_bot(ispec)) then
-            ispec2D_moho_bot = ispec2D_moho_bot + 1
-          endif
-        endif ! adjoint
-
-        ! stores displacment values in local array
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-                iglob = ibool(i,j,k,ispec)
-                dummyx_loc(i,j,k) = displ(1,iglob)
-                dummyy_loc(i,j,k) = displ(2,iglob)
-                dummyz_loc(i,j,k) = displ(3,iglob)
-            enddo
-          enddo
-        enddo
-
-        ! use first order Taylor expansion of displacement for local storage of stresses 
-        ! at this current time step, to fix attenuation in a consistent way
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           do k=1,NGLLZ
-              do j=1,NGLLY
-                 do i=1,NGLLX
-                    iglob = ibool(i,j,k,ispec)
-                    dummyx_loc_att(i,j,k) = deltat*veloc(1,iglob)
-                    dummyy_loc_att(i,j,k) = deltat*veloc(2,iglob)
-                    dummyz_loc_att(i,j,k) = deltat*veloc(3,iglob)
-                 enddo
-              enddo
-           enddo
-        endif
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-        ! call mxm_m1_m2_10points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
-        do j=1,m2
-          do i=1,m1
-            C1_m1_m2_10points(i,j) = hprime_xx(i,1)*B1_m1_m2_10points(1,j) + &
-                                  hprime_xx(i,2)*B1_m1_m2_10points(2,j) + &
-                                  hprime_xx(i,3)*B1_m1_m2_10points(3,j) + &
-                                  hprime_xx(i,4)*B1_m1_m2_10points(4,j) + &
-                                  hprime_xx(i,5)*B1_m1_m2_10points(5,j) + &
-                                  hprime_xx(i,6)*B1_m1_m2_10points(6,j) + &
-                                  hprime_xx(i,7)*B1_m1_m2_10points(7,j) + &
-                                  hprime_xx(i,8)*B1_m1_m2_10points(8,j) + &
-                                  hprime_xx(i,9)*B1_m1_m2_10points(9,j) + &
-                                  hprime_xx(i,10)*B1_m1_m2_10points(10,j)
-            C2_m1_m2_10points(i,j) = hprime_xx(i,1)*B2_m1_m2_10points(1,j) + &
-                                  hprime_xx(i,2)*B2_m1_m2_10points(2,j) + &
-                                  hprime_xx(i,3)*B2_m1_m2_10points(3,j) + &
-                                  hprime_xx(i,4)*B2_m1_m2_10points(4,j) + &
-                                  hprime_xx(i,5)*B2_m1_m2_10points(5,j) + &
-                                  hprime_xx(i,6)*B2_m1_m2_10points(6,j) + &
-                                  hprime_xx(i,7)*B2_m1_m2_10points(7,j) + &
-                                  hprime_xx(i,8)*B2_m1_m2_10points(8,j) + &
-                                  hprime_xx(i,9)*B2_m1_m2_10points(9,j) + &
-                                  hprime_xx(i,10)*B2_m1_m2_10points(10,j)
-            C3_m1_m2_10points(i,j) = hprime_xx(i,1)*B3_m1_m2_10points(1,j) + &
-                                  hprime_xx(i,2)*B3_m1_m2_10points(2,j) + &
-                                  hprime_xx(i,3)*B3_m1_m2_10points(3,j) + &
-                                  hprime_xx(i,4)*B3_m1_m2_10points(4,j) + &
-                                  hprime_xx(i,5)*B3_m1_m2_10points(5,j) + &
-                                  hprime_xx(i,6)*B3_m1_m2_10points(6,j) + &
-                                  hprime_xx(i,7)*B3_m1_m2_10points(7,j) + &
-                                  hprime_xx(i,8)*B3_m1_m2_10points(8,j) + &
-                                  hprime_xx(i,9)*B3_m1_m2_10points(9,j) + &
-                                  hprime_xx(i,10)*B3_m1_m2_10points(10,j)
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m2
-              do i=1,m1
-                 C1_m1_m2_10points_att(i,j) = C1_m1_m2_10points(i,j) + & 
-                      hprime_xx(i,1)*B1_m1_m2_10points_att(1,j) + &
-                      hprime_xx(i,2)*B1_m1_m2_10points_att(2,j) + &
-                      hprime_xx(i,3)*B1_m1_m2_10points_att(3,j) + &
-                      hprime_xx(i,4)*B1_m1_m2_10points_att(4,j) + &
-                      hprime_xx(i,5)*B1_m1_m2_10points_att(5,j) + &
-                      hprime_xx(i,6)*B1_m1_m2_10points_att(6,j) + &
-                      hprime_xx(i,7)*B1_m1_m2_10points_att(7,j) + &
-                      hprime_xx(i,8)*B1_m1_m2_10points_att(8,j) + &
-                      hprime_xx(i,9)*B1_m1_m2_10points_att(9,j) + &
-                      hprime_xx(i,10)*B1_m1_m2_10points_att(10,j)
-
-                 C2_m1_m2_10points_att(i,j) = C2_m1_m2_10points(i,j) + &
-                      hprime_xx(i,1)*B2_m1_m2_10points_att(1,j) + &
-                      hprime_xx(i,2)*B2_m1_m2_10points_att(2,j) + &
-                      hprime_xx(i,3)*B2_m1_m2_10points_att(3,j) + &
-                      hprime_xx(i,4)*B2_m1_m2_10points_att(4,j) + &
-                      hprime_xx(i,5)*B2_m1_m2_10points_att(5,j) + &
-                      hprime_xx(i,6)*B2_m1_m2_10points_att(6,j) + &
-                      hprime_xx(i,7)*B2_m1_m2_10points_att(7,j) + &
-                      hprime_xx(i,8)*B2_m1_m2_10points_att(8,j) + &
-                      hprime_xx(i,9)*B2_m1_m2_10points_att(9,j) + &
-                      hprime_xx(i,10)*B2_m1_m2_10points_att(10,j)
-
-                 C3_m1_m2_10points_att(i,j) = C3_m1_m2_10points(i,j) + &
-                      hprime_xx(i,1)*B3_m1_m2_10points_att(1,j) + &
-                      hprime_xx(i,2)*B3_m1_m2_10points_att(2,j) + &
-                      hprime_xx(i,3)*B3_m1_m2_10points_att(3,j) + &
-                      hprime_xx(i,4)*B3_m1_m2_10points_att(4,j) + &
-                      hprime_xx(i,5)*B3_m1_m2_10points_att(5,j) + &
-                      hprime_xx(i,6)*B3_m1_m2_10points_att(6,j) + &
-                      hprime_xx(i,7)*B3_m1_m2_10points_att(7,j) + &
-                      hprime_xx(i,8)*B3_m1_m2_10points_att(8,j) + &
-                      hprime_xx(i,9)*B3_m1_m2_10points_att(9,j) + &
-                      hprime_xx(i,10)*B3_m1_m2_10points_att(10,j)
-              enddo
-           enddo
-        endif
-
-        !   call mxm_m1_m1_10points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
-        !          hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
-        do j=1,m1
-          do i=1,m1
-            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-            do k = 1,NGLLX
-              tempx2(i,j,k) = dummyx_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyx_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyx_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyx_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyx_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyx_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyx_loc(i,7,k)*hprime_xxT(7,j) + &
-                            dummyx_loc(i,8,k)*hprime_xxT(8,j) + &
-                            dummyx_loc(i,9,k)*hprime_xxT(9,j) + &
-                            dummyx_loc(i,10,k)*hprime_xxT(10,j)
-              tempy2(i,j,k) = dummyy_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyy_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyy_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyy_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyy_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyy_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyy_loc(i,7,k)*hprime_xxT(7,j) + &
-                            dummyy_loc(i,8,k)*hprime_xxT(8,j) + &
-                            dummyy_loc(i,9,k)*hprime_xxT(9,j) + &
-                            dummyy_loc(i,10,k)*hprime_xxT(10,j)
-              tempz2(i,j,k) = dummyz_loc(i,1,k)*hprime_xxT(1,j) + &
-                            dummyz_loc(i,2,k)*hprime_xxT(2,j) + &
-                            dummyz_loc(i,3,k)*hprime_xxT(3,j) + &
-                            dummyz_loc(i,4,k)*hprime_xxT(4,j) + &
-                            dummyz_loc(i,5,k)*hprime_xxT(5,j) + &
-                            dummyz_loc(i,6,k)*hprime_xxT(6,j) + &
-                            dummyz_loc(i,7,k)*hprime_xxT(7,j) + &
-                            dummyz_loc(i,8,k)*hprime_xxT(8,j) + &
-                            dummyz_loc(i,9,k)*hprime_xxT(9,j) + &
-                            dummyz_loc(i,10,k)*hprime_xxT(10,j)
-            enddo
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m1
-              do i=1,m1
-                 ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-                 do k = 1,NGLLX
-                    tempx2_att(i,j,k) = tempx2(i,j,k) + &
-                         dummyx_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyx_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyx_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyx_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyx_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyx_loc_att(i,6,k)*hprime_xxT(6,j) + &
-                         dummyx_loc_att(i,7,k)*hprime_xxT(7,j) + &
-                         dummyx_loc_att(i,8,k)*hprime_xxT(8,j) + &
-                         dummyx_loc_att(i,9,k)*hprime_xxT(9,j) + &
-                         dummyx_loc_att(i,10,k)*hprime_xxT(10,j)
-
-                    tempy2_att(i,j,k) = tempy2(i,j,k) + &
-                         dummyy_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyy_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyy_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyy_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyy_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyy_loc_att(i,6,k)*hprime_xxT(6,j) + &
-                         dummyy_loc_att(i,7,k)*hprime_xxT(7,j) + &
-                         dummyy_loc_att(i,8,k)*hprime_xxT(8,j) + &
-                         dummyy_loc_att(i,9,k)*hprime_xxT(9,j) + &
-                         dummyy_loc_att(i,10,k)*hprime_xxT(10,j)
-
-                    tempz2_att(i,j,k) = tempz2(i,j,k) + &
-                         dummyz_loc_att(i,1,k)*hprime_xxT(1,j) + &
-                         dummyz_loc_att(i,2,k)*hprime_xxT(2,j) + &
-                         dummyz_loc_att(i,3,k)*hprime_xxT(3,j) + &
-                         dummyz_loc_att(i,4,k)*hprime_xxT(4,j) + &
-                         dummyz_loc_att(i,5,k)*hprime_xxT(5,j) + &
-                         dummyz_loc_att(i,6,k)*hprime_xxT(6,j) + &
-                         dummyz_loc_att(i,7,k)*hprime_xxT(7,j) + &
-                         dummyz_loc_att(i,8,k)*hprime_xxT(8,j) + &
-                         dummyz_loc_att(i,9,k)*hprime_xxT(9,j) + &
-                         dummyz_loc_att(i,10,k)*hprime_xxT(10,j)
-                 enddo
-              enddo
-           enddo
-        endif
-
-        ! call mxm_m2_m1_10points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
-        do j=1,m1
-          do i=1,m2
-            C1_mxm_m2_m1_10points(i,j) = A1_mxm_m2_m1_10points(i,1)*hprime_xxT(1,j) + &
-                                      A1_mxm_m2_m1_10points(i,2)*hprime_xxT(2,j) + &
-                                      A1_mxm_m2_m1_10points(i,3)*hprime_xxT(3,j) + &
-                                      A1_mxm_m2_m1_10points(i,4)*hprime_xxT(4,j) + &
-                                      A1_mxm_m2_m1_10points(i,5)*hprime_xxT(5,j) + &
-                                      A1_mxm_m2_m1_10points(i,6)*hprime_xxT(6,j) + &
-                                      A1_mxm_m2_m1_10points(i,7)*hprime_xxT(7,j) + &
-                                      A1_mxm_m2_m1_10points(i,8)*hprime_xxT(8,j) + &
-                                      A1_mxm_m2_m1_10points(i,9)*hprime_xxT(9,j) + &
-                                      A1_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j)
-            C2_mxm_m2_m1_10points(i,j) = A2_mxm_m2_m1_10points(i,1)*hprime_xxT(1,j) + &
-                                      A2_mxm_m2_m1_10points(i,2)*hprime_xxT(2,j) + &
-                                      A2_mxm_m2_m1_10points(i,3)*hprime_xxT(3,j) + &
-                                      A2_mxm_m2_m1_10points(i,4)*hprime_xxT(4,j) + &
-                                      A2_mxm_m2_m1_10points(i,5)*hprime_xxT(5,j) + &
-                                      A2_mxm_m2_m1_10points(i,6)*hprime_xxT(6,j) + &
-                                      A2_mxm_m2_m1_10points(i,7)*hprime_xxT(7,j) + &
-                                      A2_mxm_m2_m1_10points(i,8)*hprime_xxT(8,j) + &
-                                      A2_mxm_m2_m1_10points(i,9)*hprime_xxT(9,j) + &
-                                      A2_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j)
-            C3_mxm_m2_m1_10points(i,j) = A3_mxm_m2_m1_10points(i,1)*hprime_xxT(1,j) + &
-                                      A3_mxm_m2_m1_10points(i,2)*hprime_xxT(2,j) + &
-                                      A3_mxm_m2_m1_10points(i,3)*hprime_xxT(3,j) + &
-                                      A3_mxm_m2_m1_10points(i,4)*hprime_xxT(4,j) + &
-                                      A3_mxm_m2_m1_10points(i,5)*hprime_xxT(5,j) + &
-                                      A3_mxm_m2_m1_10points(i,6)*hprime_xxT(6,j) + &
-                                      A3_mxm_m2_m1_10points(i,7)*hprime_xxT(7,j) + &
-                                      A3_mxm_m2_m1_10points(i,8)*hprime_xxT(8,j) + &
-                                      A3_mxm_m2_m1_10points(i,9)*hprime_xxT(9,j) + &
-                                      A3_mxm_m2_m1_10points(i,10)*hprime_xxT(10,j)
-          enddo
-        enddo
-
-        if(ATTENUATION .and. COMPUTE_AND_STORE_STRAIN) then
-           ! temporary variables used for fixing attenuation in a consistent way
-           do j=1,m1
-              do i=1,m2
-                 C1_mxm_m2_m1_10points_att(i,j) = C1_mxm_m2_m1_10points(i,j) + &
-                      A1_mxm_m2_m1_10points_att(i,1)*hprime_xxT(1,j) + &
-                      A1_mxm_m2_m1_10points_att(i,2)*hprime_xxT(2,j) + &
-                      A1_mxm_m2_m1_10points_att(i,3)*hprime_xxT(3,j) + &
-                      A1_mxm_m2_m1_10points_att(i,4)*hprime_xxT(4,j) + &
-                      A1_mxm_m2_m1_10points_att(i,5)*hprime_xxT(5,j) + &
-                      A1_mxm_m2_m1_10points_att(i,6)*hprime_xxT(6,j) + &
-                      A1_mxm_m2_m1_10points_att(i,7)*hprime_xxT(7,j) + &
-                      A1_mxm_m2_m1_10points_att(i,8)*hprime_xxT(8,j) + &
-                      A1_mxm_m2_m1_10points_att(i,9)*hprime_xxT(9,j) + &
-                      A1_mxm_m2_m1_10points_att(i,10)*hprime_xxT(10,j)
-
-                 C2_mxm_m2_m1_10points_att(i,j) = C2_mxm_m2_m1_10points(i,j) + &
-                      A2_mxm_m2_m1_10points_att(i,1)*hprime_xxT(1,j) + &
-                      A2_mxm_m2_m1_10points_att(i,2)*hprime_xxT(2,j) + &
-                      A2_mxm_m2_m1_10points_att(i,3)*hprime_xxT(3,j) + &
-                      A2_mxm_m2_m1_10points_att(i,4)*hprime_xxT(4,j) + &
-                      A2_mxm_m2_m1_10points_att(i,5)*hprime_xxT(5,j) + &
-                      A2_mxm_m2_m1_10points_att(i,6)*hprime_xxT(6,j) + &
-                      A2_mxm_m2_m1_10points_att(i,7)*hprime_xxT(7,j) + &
-                      A2_mxm_m2_m1_10points_att(i,8)*hprime_xxT(8,j) + &
-                      A2_mxm_m2_m1_10points_att(i,9)*hprime_xxT(9,j) + &
-                      A2_mxm_m2_m1_10points_att(i,10)*hprime_xxT(10,j)
-
-                 C3_mxm_m2_m1_10points_att(i,j) = C3_mxm_m2_m1_10points(i,j) + &
-                      A3_mxm_m2_m1_10points_att(i,1)*hprime_xxT(1,j) + &
-                      A3_mxm_m2_m1_10points_att(i,2)*hprime_xxT(2,j) + &
-                      A3_mxm_m2_m1_10points_att(i,3)*hprime_xxT(3,j) + &
-                      A3_mxm_m2_m1_10points_att(i,4)*hprime_xxT(4,j) + &
-                      A3_mxm_m2_m1_10points_att(i,5)*hprime_xxT(5,j) + &
-                      A3_mxm_m2_m1_10points_att(i,6)*hprime_xxT(6,j) + &
-                      A3_mxm_m2_m1_10points_att(i,7)*hprime_xxT(7,j) + &
-                      A3_mxm_m2_m1_10points_att(i,8)*hprime_xxT(8,j) + &
-                      A3_mxm_m2_m1_10points_att(i,9)*hprime_xxT(9,j) + &
-                      A3_mxm_m2_m1_10points_att(i,10)*hprime_xxT(10,j)
-              enddo
-           enddo
-        endif
-
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-              ! get derivatives of ux, uy and uz with respect to x, y and z
-              xixl = xix(i,j,k,ispec)
-              xiyl = xiy(i,j,k,ispec)
-              xizl = xiz(i,j,k,ispec)
-              etaxl = etax(i,j,k,ispec)
-              etayl = etay(i,j,k,ispec)
-              etazl = etaz(i,j,k,ispec)
-              gammaxl = gammax(i,j,k,ispec)
-              gammayl = gammay(i,j,k,ispec)
-              gammazl = gammaz(i,j,k,ispec)
-              jacobianl = jacobian(i,j,k,ispec)
-
-              duxdxl = xixl*tempx1(i,j,k) + etaxl*tempx2(i,j,k) + gammaxl*tempx3(i,j,k)
-              duxdyl = xiyl*tempx1(i,j,k) + etayl*tempx2(i,j,k) + gammayl*tempx3(i,j,k)
-              duxdzl = xizl*tempx1(i,j,k) + etazl*tempx2(i,j,k) + gammazl*tempx3(i,j,k)
-
-              duydxl = xixl*tempy1(i,j,k) + etaxl*tempy2(i,j,k) + gammaxl*tempy3(i,j,k)
-              duydyl = xiyl*tempy1(i,j,k) + etayl*tempy2(i,j,k) + gammayl*tempy3(i,j,k)
-              duydzl = xizl*tempy1(i,j,k) + etazl*tempy2(i,j,k) + gammazl*tempy3(i,j,k)
-
-              duzdxl = xixl*tempz1(i,j,k) + etaxl*tempz2(i,j,k) + gammaxl*tempz3(i,j,k)
-              duzdyl = xiyl*tempz1(i,j,k) + etayl*tempz2(i,j,k) + gammayl*tempz3(i,j,k)
-              duzdzl = xizl*tempz1(i,j,k) + etazl*tempz2(i,j,k) + gammazl*tempz3(i,j,k)
-
-              ! save strain on the Moho boundary
-              if (SAVE_MOHO_MESH ) then
-                if (is_moho_top(ispec)) then
-                  dsdx_top(1,1,i,j,k,ispec2D_moho_top) = duxdxl
-                  dsdx_top(1,2,i,j,k,ispec2D_moho_top) = duxdyl
-                  dsdx_top(1,3,i,j,k,ispec2D_moho_top) = duxdzl
-                  dsdx_top(2,1,i,j,k,ispec2D_moho_top) = duydxl
-                  dsdx_top(2,2,i,j,k,ispec2D_moho_top) = duydyl
-                  dsdx_top(2,3,i,j,k,ispec2D_moho_top) = duydzl
-                  dsdx_top(3,1,i,j,k,ispec2D_moho_top) = duzdxl
-                  dsdx_top(3,2,i,j,k,ispec2D_moho_top) = duzdyl
-                  dsdx_top(3,3,i,j,k,ispec2D_moho_top) = duzdzl
-                else if (is_moho_bot(ispec)) then
-                  dsdx_bot(1,1,i,j,k,ispec2D_moho_bot) = duxdxl
-                  dsdx_bot(1,2,i,j,k,ispec2D_moho_bot) = duxdyl
-                  dsdx_bot(1,3,i,j,k,ispec2D_moho_bot) = duxdzl
-                  dsdx_bot(2,1,i,j,k,ispec2D_moho_bot) = duydxl
-                  dsdx_bot(2,2,i,j,k,ispec2D_moho_bot) = duydyl
-                  dsdx_bot(2,3,i,j,k,ispec2D_moho_bot) = duydzl
-                  dsdx_bot(3,1,i,j,k,ispec2D_moho_bot) = duzdxl
-                  dsdx_bot(3,2,i,j,k,ispec2D_moho_bot) = duzdyl
-                  dsdx_bot(3,3,i,j,k,ispec2D_moho_bot) = duzdzl
-                endif
-              endif
-
-              ! precompute some sums to save CPU time
-              duxdxl_plus_duydyl = duxdxl + duydyl
-              duxdxl_plus_duzdzl = duxdxl + duzdzl
-              duydyl_plus_duzdzl = duydyl + duzdzl
-              duxdyl_plus_duydxl = duxdyl + duydxl
-              duzdxl_plus_duxdzl = duzdxl + duxdzl
-              duzdyl_plus_duydzl = duzdyl + duydzl
-
-
-              if ( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
-                 ! temporary variables used for fixing attenuation in a consistent way
-                 duxdxl_att = xixl*tempx1_att(i,j,k) + etaxl*tempx2_att(i,j,k) + gammaxl*tempx3_att(i,j,k)
-                 duxdyl_att = xiyl*tempx1_att(i,j,k) + etayl*tempx2_att(i,j,k) + gammayl*tempx3_att(i,j,k)
-                 duxdzl_att = xizl*tempx1_att(i,j,k) + etazl*tempx2_att(i,j,k) + gammazl*tempx3_att(i,j,k)
-
-                 duydxl_att = xixl*tempy1_att(i,j,k) + etaxl*tempy2_att(i,j,k) + gammaxl*tempy3_att(i,j,k)
-                 duydyl_att = xiyl*tempy1_att(i,j,k) + etayl*tempy2_att(i,j,k) + gammayl*tempy3_att(i,j,k)
-                 duydzl_att = xizl*tempy1_att(i,j,k) + etazl*tempy2_att(i,j,k) + gammazl*tempy3_att(i,j,k)
-
-                 duzdxl_att = xixl*tempz1_att(i,j,k) + etaxl*tempz2_att(i,j,k) + gammaxl*tempz3_att(i,j,k)
-                 duzdyl_att = xiyl*tempz1_att(i,j,k) + etayl*tempz2_att(i,j,k) + gammayl*tempz3_att(i,j,k)
-                 duzdzl_att = xizl*tempz1_att(i,j,k) + etazl*tempz2_att(i,j,k) + gammazl*tempz3_att(i,j,k)
-
-                 ! precompute some sums to save CPU time
-                 duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
-                 duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
-                 duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
-
-                 ! compute deviatoric strain
-                 templ = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
-                 if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                 epsilondev_xx_loc(i,j,k) = duxdxl_att - templ
-                 epsilondev_yy_loc(i,j,k) = duydyl_att - templ
-                 epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl_att
-                 epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
-                 epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl_att
-              else
-                 ! computes deviatoric strain attenuation and/or for kernel calculations
-                 if (COMPUTE_AND_STORE_STRAIN) then
-                    templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
-                    if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                    epsilondev_xx_loc(i,j,k) = duxdxl - templ
-                    epsilondev_yy_loc(i,j,k) = duydyl - templ
-                    epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl
-                    epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl
-                    epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl
-                 endif
-              endif
-
-              kappal = kappastore(i,j,k,ispec)
-              mul = mustore(i,j,k,ispec)
-
-              ! attenuation
-              if(ATTENUATION) then
-                ! use unrelaxed parameters if attenuation
-                mul  = mul * one_minus_sum_beta(i,j,k,ispec)
-              endif
-
-  ! full anisotropic case, stress calculations
-              if(ANISOTROPY) then
-                c11 = c11store(i,j,k,ispec)
-                c12 = c12store(i,j,k,ispec)
-                c13 = c13store(i,j,k,ispec)
-                c14 = c14store(i,j,k,ispec)
-                c15 = c15store(i,j,k,ispec)
-                c16 = c16store(i,j,k,ispec)
-                c22 = c22store(i,j,k,ispec)
-                c23 = c23store(i,j,k,ispec)
-                c24 = c24store(i,j,k,ispec)
-                c25 = c25store(i,j,k,ispec)
-                c26 = c26store(i,j,k,ispec)
-                c33 = c33store(i,j,k,ispec)
-                c34 = c34store(i,j,k,ispec)
-                c35 = c35store(i,j,k,ispec)
-                c36 = c36store(i,j,k,ispec)
-                c44 = c44store(i,j,k,ispec)
-                c45 = c45store(i,j,k,ispec)
-                c46 = c46store(i,j,k,ispec)
-                c55 = c55store(i,j,k,ispec)
-                c56 = c56store(i,j,k,ispec)
-                c66 = c66store(i,j,k,ispec)
-
-                sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
-                          c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
-                sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
-                          c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
-                sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
-                          c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
-                sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
-                          c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
-                sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
-                          c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
-                sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
-                          c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
-
-              else
-
-  ! isotropic case
-                lambdalplus2mul = kappal + FOUR_THIRDS * mul
-                lambdal = lambdalplus2mul - 2.*mul
-
-                ! compute stress sigma
-                sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
-                sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
-                sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
-
-                sigma_xy = mul*duxdyl_plus_duydxl
-                sigma_xz = mul*duzdxl_plus_duxdzl
-                sigma_yz = mul*duzdyl_plus_duydzl
-
-              endif ! ANISOTROPY
-
-              ! subtract memory variables if attenuation
-              if(ATTENUATION) then
-! way 1
-!                do i_sls = 1,N_SLS
-!                  R_xx_val = R_xx(i,j,k,ispec,i_sls)
-!                  R_yy_val = R_yy(i,j,k,ispec,i_sls)
-!                  sigma_xx = sigma_xx - R_xx_val
-!                  sigma_yy = sigma_yy - R_yy_val
-!                  sigma_zz = sigma_zz + R_xx_val + R_yy_val
-!                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-!                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-!                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-!                enddo
-
-! way 2
-! note: this should help compilers to pipeline the code and make better use of the cache;
-!          depending on compilers, it can further decrease the computation time by ~ 30%.
-!          by default, N_SLS = 3, therefore we take steps of 3
-              if(imodulo_N_SLS >= 1) then
-                do i_sls = 1,imodulo_N_SLS
-                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                  sigma_xx = sigma_xx - R_xx_val1
-                  sigma_yy = sigma_yy - R_yy_val1
-                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-                enddo
-              endif
-
-              if(N_SLS >= imodulo_N_SLS+1) then
-                do i_sls = imodulo_N_SLS+1,N_SLS,3
-                  R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                  R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                  sigma_xx = sigma_xx - R_xx_val1
-                  sigma_yy = sigma_yy - R_yy_val1
-                  sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-
-                  R_xx_val2 = R_xx(i,j,k,ispec,i_sls+1)
-                  R_yy_val2 = R_yy(i,j,k,ispec,i_sls+1)
-                  sigma_xx = sigma_xx - R_xx_val2
-                  sigma_yy = sigma_yy - R_yy_val2
-                  sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+1)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+1)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+1)
-
-                  R_xx_val3 = R_xx(i,j,k,ispec,i_sls+2)
-                  R_yy_val3 = R_yy(i,j,k,ispec,i_sls+2)
-                  sigma_xx = sigma_xx - R_xx_val3
-                  sigma_yy = sigma_yy - R_yy_val3
-                  sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3
-                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+2)
-                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+2)
-                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+2)
-                enddo
-              endif
-
-
-              endif
-
-            ! define symmetric components of sigma
-            sigma_yx = sigma_xy
-            sigma_zx = sigma_xz
-            sigma_zy = sigma_yz
-
-            ! form dot product with test vector, non-symmetric form (which is useful in the case of PML)
-            tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
-            tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
-            tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
-
-            tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
-            tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
-            tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
-
-            tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
-            tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
-            tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
-
-            enddo
-          enddo
-        enddo
-
-    ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-    ! for incompressible fluid flow, Cambridge University Press (2002),
-    ! pages 386 and 389 and Figure 8.3.1
-        ! call mxm_m1_m2_10points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
-        do j=1,m2
-          do i=1,m1
-            E1_m1_m2_10points(i,j) = hprimewgll_xxT(i,1)*C1_m1_m2_10points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C1_m1_m2_10points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C1_m1_m2_10points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C1_m1_m2_10points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C1_m1_m2_10points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C1_m1_m2_10points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C1_m1_m2_10points(7,j) + &
-                                  hprimewgll_xxT(i,8)*C1_m1_m2_10points(8,j) + &
-                                  hprimewgll_xxT(i,9)*C1_m1_m2_10points(9,j) + &
-                                  hprimewgll_xxT(i,10)*C1_m1_m2_10points(10,j)
-            E2_m1_m2_10points(i,j) = hprimewgll_xxT(i,1)*C2_m1_m2_10points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C2_m1_m2_10points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C2_m1_m2_10points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C2_m1_m2_10points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C2_m1_m2_10points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C2_m1_m2_10points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C2_m1_m2_10points(7,j) + &
-                                  hprimewgll_xxT(i,8)*C2_m1_m2_10points(8,j) + &
-                                  hprimewgll_xxT(i,9)*C2_m1_m2_10points(9,j) + &
-                                  hprimewgll_xxT(i,10)*C2_m1_m2_10points(10,j)
-            E3_m1_m2_10points(i,j) = hprimewgll_xxT(i,1)*C3_m1_m2_10points(1,j) + &
-                                  hprimewgll_xxT(i,2)*C3_m1_m2_10points(2,j) + &
-                                  hprimewgll_xxT(i,3)*C3_m1_m2_10points(3,j) + &
-                                  hprimewgll_xxT(i,4)*C3_m1_m2_10points(4,j) + &
-                                  hprimewgll_xxT(i,5)*C3_m1_m2_10points(5,j) + &
-                                  hprimewgll_xxT(i,6)*C3_m1_m2_10points(6,j) + &
-                                  hprimewgll_xxT(i,7)*C3_m1_m2_10points(7,j) + &
-                                  hprimewgll_xxT(i,8)*C3_m1_m2_10points(8,j) + &
-                                  hprimewgll_xxT(i,9)*C3_m1_m2_10points(9,j) + &
-                                  hprimewgll_xxT(i,10)*C3_m1_m2_10points(10,j)
-          enddo
-        enddo
-
-        !   call mxm_m1_m1_10points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
-        !         hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
-        do i=1,m1
-          do j=1,m1
-            ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-            do k = 1,NGLLX
-              newtempx2(i,j,k) = tempx2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempx2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempx2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempx2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempx2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempx2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempx2(i,7,k)*hprimewgll_xx(7,j) + &
-                               tempx2(i,8,k)*hprimewgll_xx(8,j) + &
-                               tempx2(i,9,k)*hprimewgll_xx(9,j) + &
-                               tempx2(i,10,k)*hprimewgll_xx(10,j)
-              newtempy2(i,j,k) = tempy2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempy2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempy2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempy2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempy2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempy2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempy2(i,7,k)*hprimewgll_xx(7,j) + &
-                               tempy2(i,8,k)*hprimewgll_xx(8,j) + &
-                               tempy2(i,9,k)*hprimewgll_xx(9,j) + &
-                               tempy2(i,10,k)*hprimewgll_xx(10,j)
-              newtempz2(i,j,k) = tempz2(i,1,k)*hprimewgll_xx(1,j) + &
-                               tempz2(i,2,k)*hprimewgll_xx(2,j) + &
-                               tempz2(i,3,k)*hprimewgll_xx(3,j) + &
-                               tempz2(i,4,k)*hprimewgll_xx(4,j) + &
-                               tempz2(i,5,k)*hprimewgll_xx(5,j) + &
-                               tempz2(i,6,k)*hprimewgll_xx(6,j) + &
-                               tempz2(i,7,k)*hprimewgll_xx(7,j) + &
-                               tempz2(i,8,k)*hprimewgll_xx(8,j) + &
-                               tempz2(i,9,k)*hprimewgll_xx(9,j) + &
-                               tempz2(i,10,k)*hprimewgll_xx(10,j)
-            enddo
-          enddo
-        enddo
-
-        ! call mxm_m2_m1_10points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
-        do j=1,m1
-          do i=1,m2
-            E1_mxm_m2_m1_10points(i,j) = C1_mxm_m2_m1_10points(i,1)*hprimewgll_xx(1,j) + &
-                                      C1_mxm_m2_m1_10points(i,2)*hprimewgll_xx(2,j) + &
-                                      C1_mxm_m2_m1_10points(i,3)*hprimewgll_xx(3,j) + &
-                                      C1_mxm_m2_m1_10points(i,4)*hprimewgll_xx(4,j) + &
-                                      C1_mxm_m2_m1_10points(i,5)*hprimewgll_xx(5,j) + &
-                                      C1_mxm_m2_m1_10points(i,6)*hprimewgll_xx(6,j) + &
-                                      C1_mxm_m2_m1_10points(i,7)*hprimewgll_xx(7,j) + &
-                                      C1_mxm_m2_m1_10points(i,8)*hprimewgll_xx(8,j) + &
-                                      C1_mxm_m2_m1_10points(i,9)*hprimewgll_xx(9,j) + &
-                                      C1_mxm_m2_m1_10points(i,10)*hprimewgll_xx(10,j)
-            E2_mxm_m2_m1_10points(i,j) = C2_mxm_m2_m1_10points(i,1)*hprimewgll_xx(1,j) + &
-                                      C2_mxm_m2_m1_10points(i,2)*hprimewgll_xx(2,j) + &
-                                      C2_mxm_m2_m1_10points(i,3)*hprimewgll_xx(3,j) + &
-                                      C2_mxm_m2_m1_10points(i,4)*hprimewgll_xx(4,j) + &
-                                      C2_mxm_m2_m1_10points(i,5)*hprimewgll_xx(5,j) + &
-                                      C2_mxm_m2_m1_10points(i,6)*hprimewgll_xx(6,j) + &
-                                      C2_mxm_m2_m1_10points(i,7)*hprimewgll_xx(7,j) + &
-                                      C2_mxm_m2_m1_10points(i,8)*hprimewgll_xx(8,j) + &
-                                      C2_mxm_m2_m1_10points(i,9)*hprimewgll_xx(9,j) + &
-                                      C2_mxm_m2_m1_10points(i,10)*hprimewgll_xx(10,j)
-            E3_mxm_m2_m1_10points(i,j) = C3_mxm_m2_m1_10points(i,1)*hprimewgll_xx(1,j) + &
-                                      C3_mxm_m2_m1_10points(i,2)*hprimewgll_xx(2,j) + &
-                                      C3_mxm_m2_m1_10points(i,3)*hprimewgll_xx(3,j) + &
-                                      C3_mxm_m2_m1_10points(i,4)*hprimewgll_xx(4,j) + &
-                                      C3_mxm_m2_m1_10points(i,5)*hprimewgll_xx(5,j) + &
-                                      C3_mxm_m2_m1_10points(i,6)*hprimewgll_xx(6,j) + &
-                                      C3_mxm_m2_m1_10points(i,7)*hprimewgll_xx(7,j) + &
-                                      C3_mxm_m2_m1_10points(i,8)*hprimewgll_xx(8,j) + &
-                                      C3_mxm_m2_m1_10points(i,9)*hprimewgll_xx(9,j) + &
-                                      C3_mxm_m2_m1_10points(i,10)*hprimewgll_xx(10,j)
-          enddo
-        enddo
-
-        do k=1,NGLLZ
-          do j=1,NGLLY
-            do i=1,NGLLX
-
-              fac1 = wgllwgll_yz(j,k)
-              fac2 = wgllwgll_xz(i,k)
-              fac3 = wgllwgll_xy(i,j)
-
-              ! sum contributions from each element to the global mesh using indirect addressing
-              iglob = ibool(i,j,k,ispec)
-              accel(1,iglob) = accel(1,iglob) - fac1*newtempx1(i,j,k) - &
-                                fac2*newtempx2(i,j,k) - fac3*newtempx3(i,j,k)
-              accel(2,iglob) = accel(2,iglob) - fac1*newtempy1(i,j,k) - &
-                                fac2*newtempy2(i,j,k) - fac3*newtempy3(i,j,k)
-              accel(3,iglob) = accel(3,iglob) - fac1*newtempz1(i,j,k) - &
-                                fac2*newtempz2(i,j,k) - fac3*newtempz3(i,j,k)
-
-              !  update memory variables based upon the Runge-Kutta scheme
-              if(ATTENUATION) then
-
-                 ! use Runge-Kutta scheme to march in time
-                 do i_sls = 1,N_SLS
-
-                    factor_loc = mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
-
-                    alphaval_loc = alphaval(i_sls)
-                    betaval_loc = betaval(i_sls)
-                    gammaval_loc = gammaval(i_sls)
-
-                    ! term in xx
-                    Sn   = factor_loc * epsilondev_xx(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xx_loc(i,j,k)
-                    R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in yy
-                    Sn   = factor_loc * epsilondev_yy(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_yy_loc(i,j,k)
-                    R_yy(i,j,k,ispec,i_sls) = alphaval_loc * R_yy(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in zz not computed since zero trace
-                    ! term in xy
-                    Sn   = factor_loc * epsilondev_xy(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xy_loc(i,j,k)
-                    R_xy(i,j,k,ispec,i_sls) = alphaval_loc * R_xy(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in xz
-                    Sn   = factor_loc * epsilondev_xz(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_xz_loc(i,j,k)
-                    R_xz(i,j,k,ispec,i_sls) = alphaval_loc * R_xz(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-                    ! term in yz
-                    Sn   = factor_loc * epsilondev_yz(i,j,k,ispec)
-                    Snp1   = factor_loc * epsilondev_yz_loc(i,j,k)
-                    R_yz(i,j,k,ispec,i_sls) = alphaval_loc * R_yz(i,j,k,ispec,i_sls) + &
-                                      betaval_loc * Sn + gammaval_loc * Snp1
-
-                 enddo   ! end of loop on memory variables
-
-              endif  !  end attenuation
-
-            enddo
-          enddo
-        enddo
-
-        ! save deviatoric strain for Runge-Kutta scheme
-        if ( COMPUTE_AND_STORE_STRAIN ) then
-          epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
-          epsilondev_yy(:,:,:,ispec) = epsilondev_yy_loc(:,:,:)
-          epsilondev_xy(:,:,:,ispec) = epsilondev_xy_loc(:,:,:)
-          epsilondev_xz(:,:,:,ispec) = epsilondev_xz_loc(:,:,:)
-          epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:)
-        endif
-
-  enddo  ! spectral element loop
-
-end subroutine compute_forces_elastic_Dev_10p



More information about the CIG-COMMITS mailing list