[cig-commits] r21239 - seismo/3D/SPECFEM3D/trunk/src/specfem3D

Wed Jan 16 10:04:01 PST 2013

Author: dkomati1
Date: 2013-01-16 10:04:01 -0800 (Wed, 16 Jan 2013)
New Revision: 21239

Added:
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_calling_routine.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_pot_noDev.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_calling_routine.F90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_fluid_for_poro.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_poro_solid_part.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_compute_forces_elastic_Dev_openmp.f90
Removed:
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_pot.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic.F90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_Dev_openmp.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_fluid.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_solid.f90
Modified:
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/Makefile.in
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_poroelastic.f90
Log:
renamed several files for clarity, to avoid confusion between calling programs and actual calculation subroutines


Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/Makefile.in
===================================================================

--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/Makefile.in	2013-01-16 17:48:05 UTC (rev 21238)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/Makefile.in	2013-01-16 18:04:01 UTC (rev 21239)
@@ -188,15 +188,15 @@
 	$O/compute_coupling_elastic_po.o \
 	$O/compute_coupling_poroelastic_ac.o \
 	$O/compute_coupling_poroelastic_el.o \
-	$O/compute_forces_acoustic.o \
+	$O/compute_forces_acoustic_calling_routine.o \
 	$O/compute_forces_acoustic_PML.o \
-	$O/compute_forces_acoustic_pot.o \
-	$O/compute_forces_elastic.o \
+	$O/compute_forces_acoustic_pot_noDev.o \
+	$O/compute_forces_elastic_calling_routine.o \
 	$O/compute_forces_elastic_Dev.o \
 	$O/compute_forces_elastic_noDev.o \
-	$O/compute_forces_fluid.o \
+	$O/compute_forces_poro_fluid_part.o \
 	$O/compute_forces_poroelastic.o \
-	$O/compute_forces_solid.o \
+	$O/compute_forces_poro_solid_part.o \
 	$O/compute_gradient.o \
 	$O/compute_interpolated_dva.o \
 	$O/compute_kernels.o \
@@ -264,7 +264,7 @@
 @COND_MPI_FALSE at COND_MPI_OBJECTS = $O/serial.o
 
 # objects toggled between openmp and non-openmp version
- at COND_OPENMP_TRUE@COND_OPENMP_OBJECTS = $O/compute_forces_elastic_Dev_openmp.openmp.o
+ at COND_OPENMP_TRUE@COND_OPENMP_OBJECTS = $O/older_compute_forces_elastic_Dev_openmp.openmp.o
 @COND_OPENMP_FALSE at COND_OPENMP_OBJECTS =
 
 LIBSPECFEM = $L/libspecfem.a

Deleted: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic.f90	2013-01-16 17:48:05 UTC (rev 21238)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic.f90	2013-01-16 18:04:01 UTC (rev 21239)
@@ -1,443 +0,0 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 1
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and CNRS / INRIA / University of Pau
-! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
-!                             July 2012
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! acoustic solver
-
-! in case of an acoustic medium, a potential Chi of (density * displacement) is used as in Chaljub and Valette,
-! Geophysical Journal International, vol. 158, p. 131-141 (2004) and *NOT* a velocity potential
-! as in Komatitsch and Tromp, Geophysical Journal International, vol. 150, p. 303-318 (2002).
-!
-! This permits acoustic-elastic coupling based on a non-iterative time scheme.
-! Displacement is then:
-!     u = grad(Chi) / rho
-! Velocity is then:
-!     v = grad(Chi_dot) / rho
-! (Chi_dot being the time derivative of Chi)
-! and pressure is:
-!     p = - Chi_dot_dot
-! (Chi_dot_dot being the time second derivative of Chi).
-!
-! The source in an acoustic element is a pressure source.
-!
-! First-order acoustic-acoustic discontinuities are also handled automatically
-! because pressure is continuous at such an interface, therefore Chi_dot_dot
-! is continuous, therefore Chi is also continuous, which is consistent with
-! the spectral-element basis functions and with the assembling process.
-! This is the reason why a simple displacement potential u = grad(Chi) would
-! not work because it would be discontinuous at such an interface and would
-! therefore not be consistent with the basis functions.
-
-
-subroutine compute_forces_acoustic()
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-
-  implicit none
-
-  ! local parameters
-  integer:: iphase
-  logical:: phase_is_inner
-
-  ! enforces free surface (zeroes potentials at free surface)
-  if(.NOT. GPU_MODE) then
-    ! on CPU
-    call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB,ABSORB_INSTEAD_OF_FREE_SURFACE, &
-                        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
-                        ibool,free_surface_ijk,free_surface_ispec, &
-                        num_free_surface_faces,ispec_is_acoustic)
-
-    ! adjoint simulations
-    if( SIMULATION_TYPE == 3 ) &
-      call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_ADJOINT,ABSORB_INSTEAD_OF_FREE_SURFACE, &
-                        b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
-                        ibool,free_surface_ijk,free_surface_ispec, &
-                        num_free_surface_faces,ispec_is_acoustic)
-  else
-    ! on GPU
-    call acoustic_enforce_free_surf_cuda(Mesh_pointer,ABSORB_INSTEAD_OF_FREE_SURFACE)
-  endif
-
-  ! distinguishes two runs: for elements on MPI interfaces, and elements within the partitions
-  do iphase=1,2
-
-    !first for points on MPI interfaces, thus outer elements
-    if( iphase == 1 ) then
-      phase_is_inner = .false.
-    else
-      phase_is_inner = .true.
-    endif
-
-    ! acoustic pressure term
-    if(.NOT. GPU_MODE) then
-      ! on CPU
-      call compute_forces_acoustic_pot( iphase, NSPEC_AB,NGLOB_AB, &
-                        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_yy,hprime_zz, &
-                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        rhostore,jacobian,ibool,deltat, &
-                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax,NSPEC2D_BOTTOM,NSPEC2D_TOP, &
-                        ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
-                        num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
-                        phase_ispec_inner_acoustic )
-
-      ! adjoint simulations
-      if( SIMULATION_TYPE == 3 ) &
-        call compute_forces_acoustic_pot( iphase, NSPEC_ADJOINT,NGLOB_ADJOINT, &
-                        b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_yy,hprime_zz, &
-                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        rhostore,jacobian,ibool,deltat, &
-                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax,NSPEC2D_BOTTOM,NSPEC2D_TOP, &
-                        ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
-                        num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
-                        phase_ispec_inner_acoustic )
-    else
-      ! on GPU
-      ! includes code for SIMULATION_TYPE==3
-      call compute_forces_acoustic_cuda(Mesh_pointer, iphase, &
-                                        nspec_outer_acoustic, nspec_inner_acoustic)
-    endif
-
-    ! ! Stacey absorbing boundary conditions
-    if(ABSORBING_CONDITIONS) then
-       call compute_stacey_acoustic(NSPEC_AB,NGLOB_AB, &
-                         potential_dot_dot_acoustic,potential_dot_acoustic, &
-                         ibool,ispec_is_inner,phase_is_inner, &
-                         abs_boundary_jacobian2Dw,abs_boundary_ijk,abs_boundary_ispec, &
-                         num_abs_boundary_faces,rhostore,kappastore,ispec_is_acoustic, &
-                         SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it,NGLOB_ADJOINT, &
-                         b_potential_dot_dot_acoustic,b_reclen_potential, &
-                         b_absorb_potential,b_num_abs_boundary_faces, &
-                         GPU_MODE,Mesh_pointer)
-    endif
-
-    ! elastic coupling
-    if(ELASTIC_SIMULATION ) then
-      if( num_coupling_ac_el_faces > 0 ) then
-        if( .NOT. GPU_MODE ) then
-          if( SIMULATION_TYPE == 1 ) then
-            ! forward definition: \bfs=\frac{1}{\rho}\bfnabla\phi
-            call compute_coupling_acoustic_el(NSPEC_AB,NGLOB_AB, &
-                              ibool,displ,potential_dot_dot_acoustic, &
-                              num_coupling_ac_el_faces, &
-                              coupling_ac_el_ispec,coupling_ac_el_ijk, &
-                              coupling_ac_el_normal, &
-                              coupling_ac_el_jacobian2Dw, &
-                              ispec_is_inner,phase_is_inner)
-          else
-            ! handles adjoint runs coupling between adjoint potential and adjoint elastic wavefield
-            ! adjoint definition: \partial_t^2 \bfs^\dagger=-\frac{1}{\rho}\bfnabla\phi^\dagger
-            call compute_coupling_acoustic_el(NSPEC_AB,NGLOB_AB, &
-                              ibool,-accel_adj_coupling,potential_dot_dot_acoustic, &
-                              num_coupling_ac_el_faces, &
-                              coupling_ac_el_ispec,coupling_ac_el_ijk, &
-                              coupling_ac_el_normal, &
-                              coupling_ac_el_jacobian2Dw, &
-                              ispec_is_inner,phase_is_inner)
-          endif
-          ! adjoint/kernel simulations
-          if( SIMULATION_TYPE == 3 ) &
-            call compute_coupling_acoustic_el(NSPEC_ADJOINT,NGLOB_ADJOINT, &
-                            ibool,b_displ,b_potential_dot_dot_acoustic, &
-                            num_coupling_ac_el_faces, &
-                            coupling_ac_el_ispec,coupling_ac_el_ijk, &
-                            coupling_ac_el_normal, &
-                            coupling_ac_el_jacobian2Dw, &
-                            ispec_is_inner,phase_is_inner)
-
-        else
-          ! on GPU
-          call compute_coupling_ac_el_cuda(Mesh_pointer,phase_is_inner, &
-                                              num_coupling_ac_el_faces)
-        endif ! GPU_MODE
-      endif
-    endif
-
-! poroelastic coupling
-    if(POROELASTIC_SIMULATION )  then
-      if( num_coupling_ac_po_faces > 0 ) then
-        if( SIMULATION_TYPE == 1 ) then
-          call compute_coupling_acoustic_po(NSPEC_AB,NGLOB_AB, &
-                        ibool,displs_poroelastic,displw_poroelastic, &
-                        potential_dot_dot_acoustic, &
-                        num_coupling_ac_po_faces, &
-                        coupling_ac_po_ispec,coupling_ac_po_ijk, &
-                        coupling_ac_po_normal, &
-                        coupling_ac_po_jacobian2Dw, &
-                        ispec_is_inner,phase_is_inner)
-        else
-          stop 'not implemented yet'
-        endif
-        if( SIMULATION_TYPE == 3 ) &
-          stop 'not implemented yet'
-      endif
-    endif
-
-    ! sources
-    call compute_add_sources_acoustic(NSPEC_AB,NGLOB_AB,potential_dot_dot_acoustic, &
-                        ibool,ispec_is_inner,phase_is_inner, &
-                        NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,&
-                        xi_source,eta_source,gamma_source, &
-                        hdur,hdur_gaussian,tshift_src,dt,t0, &
-                        sourcearrays,kappastore,ispec_is_acoustic,&
-                        SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
-                        nrec,islice_selected_rec,ispec_selected_rec, &
-                        nadj_rec_local,adj_sourcearrays,b_potential_dot_dot_acoustic, &
-                        NTSTEP_BETWEEN_READ_ADJSRC, &
-                        GPU_MODE, Mesh_pointer)
-
-    ! assemble all the contributions between slices using MPI
-    if( phase_is_inner .eqv. .false. ) then
-      ! sends potential_dot_dot_acoustic values to corresponding MPI interface neighbors (non-blocking)
-      if(.NOT. GPU_MODE) then
-        call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
-                        buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
-                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
-                        my_neighbours_ext_mesh, &
-                        request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
-      else
-        ! on GPU
-        call transfer_boun_pot_from_device(NGLOB_AB, Mesh_pointer, &
-                                            potential_dot_dot_acoustic, &
-                                            buffer_send_scalar_ext_mesh, &
-                                            num_interfaces_ext_mesh, &
-                                            max_nibool_interfaces_ext_mesh, &
-                                            nibool_interfaces_ext_mesh, &
-                                            ibool_interfaces_ext_mesh, &
-                                            1) ! <-- 1 == fwd accel
-        call assemble_MPI_scalar_send_cuda(NPROC, &
-                        buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
-                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,&
-                        my_neighbours_ext_mesh, &
-                        request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
-      endif
-
-      ! adjoint simulations
-      if( SIMULATION_TYPE == 3 ) then
-        if(.NOT. GPU_MODE) then
-          call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
-                        b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, &
-                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
-                        my_neighbours_ext_mesh, &
-                        b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
-        else
-          ! on GPU
-          call transfer_boun_pot_from_device(NGLOB_AB, Mesh_pointer, &
-                                                  b_potential_dot_dot_acoustic, &
-                                                  b_buffer_send_scalar_ext_mesh,&
-                                                  num_interfaces_ext_mesh, &
-                                                  max_nibool_interfaces_ext_mesh, &
-                                                  nibool_interfaces_ext_mesh, &
-                                                  ibool_interfaces_ext_mesh, &
-                                                  3) ! <-- 3 == adjoint b_accel
-
-          call assemble_MPI_scalar_send_cuda(NPROC, &
-                          b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, &
-                          num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                          nibool_interfaces_ext_mesh,&
-                          my_neighbours_ext_mesh, &
-                          b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
-
-        endif
-      endif
-
-    else
-
-      ! waits for send/receive requests to be completed and assembles values
-      if(.NOT. GPU_MODE) then
-        call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
-                        buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
-                        max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                        request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
-      else
-        ! on GPU
-        call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
-                        Mesh_pointer,&
-                        buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
-                        max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                        request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
-                        1)
-      endif
-
-      ! adjoint simulations
-      if( SIMULATION_TYPE == 3 ) then
-        if(.NOT. GPU_MODE) then
-          call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
-                        b_buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
-                        max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                        b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
-        else
-          ! on GPU
-          call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,b_potential_dot_dot_acoustic, &
-                        Mesh_pointer, &
-                        b_buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh, &
-                        max_nibool_interfaces_ext_mesh, &
-                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                        b_request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
-                        3)
-        endif
-      endif
-    endif !phase_is_inner
-
-  enddo
-
-  if(.NOT. GPU_MODE) then
-    ! divides pressure with mass matrix
-    potential_dot_dot_acoustic(:) = potential_dot_dot_acoustic(:) * rmass_acoustic(:)
-
-    ! adjoint simulations
-    if (SIMULATION_TYPE == 3) &
-      b_potential_dot_dot_acoustic(:) = b_potential_dot_dot_acoustic(:) * rmass_acoustic(:)
-  else
-    ! on GPU
-    call kernel_3_a_acoustic_cuda(Mesh_pointer,NGLOB_AB)
-  endif
-
-! update velocity
-! note: Newmark finite-difference time scheme with acoustic domains:
-! (see e.g. Hughes, 1987; Chaljub et al., 2003)
-!
-! chi(t+delta_t) = chi(t) + delta_t chi_dot(t) + 1/2 delta_t**2 chi_dot_dot(t)
-! chi_dot(t+delta_t) = chi_dot(t) + 1/2 delta_t chi_dot_dot(t) + 1/2 DELTA_T CHI_DOT_DOT( T + DELTA_T )
-! chi_dot_dot(t+delta_t) = 1/M_acoustic( -K_acoustic chi(t+delta) + B_acoustic u(t+delta_t) + f(t+delta_t) )
-!
-! where
-!   chi, chi_dot, chi_dot_dot are acoustic (fluid) potentials ( dotted with respect to time)
-!   u, v, a are displacement,velocity & acceleration
-!   M is mass matrix, K stiffness matrix and B boundary term
-!   f denotes a source term
-!
-! corrector:
-!   updates the chi_dot term which requires chi_dot_dot(t+delta)
- if( .NOT. GPU_MODE ) then
-    ! corrector
-    potential_dot_acoustic(:) = potential_dot_acoustic(:) + deltatover2*potential_dot_dot_acoustic(:)
-
-    ! adjoint simulations
-    if (SIMULATION_TYPE == 3) &
-      b_potential_dot_acoustic(:) = b_potential_dot_acoustic(:) + b_deltatover2*b_potential_dot_dot_acoustic(:)
-  else
-    ! on GPU
-    call kernel_3_b_acoustic_cuda(Mesh_pointer,NGLOB_AB,deltatover2,b_deltatover2)
-  endif
-
-! enforces free surface (zeroes potentials at free surface)
-  if(.NOT. GPU_MODE) then
-    ! on CPU
-    call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB,ABSORB_INSTEAD_OF_FREE_SURFACE, &
-                        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
-                        ibool,free_surface_ijk,free_surface_ispec, &
-                        num_free_surface_faces,ispec_is_acoustic)
-
-    if( SIMULATION_TYPE /= 1 ) then
-      potential_acoustic_adj_coupling(:) = potential_acoustic(:) &
-                            + deltat * potential_dot_acoustic(:) &
-                            + deltatsqover2 * potential_dot_dot_acoustic(:)
-    endif
-
-    ! adjoint simulations
-    if (SIMULATION_TYPE == 3) &
-      call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_ADJOINT,ABSORB_INSTEAD_OF_FREE_SURFACE, &
-                        b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
-                        ibool,free_surface_ijk,free_surface_ispec, &
-                        num_free_surface_faces,ispec_is_acoustic)
-  else
-    ! on GPU
-    call acoustic_enforce_free_surf_cuda(Mesh_pointer,ABSORB_INSTEAD_OF_FREE_SURFACE)
-  endif
-
-end subroutine compute_forces_acoustic
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-subroutine acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB,ABSORB_INSTEAD_OF_FREE_SURFACE, &
-                        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
-                        ibool,free_surface_ijk,free_surface_ispec, &
-                        num_free_surface_faces,ispec_is_acoustic)
-  implicit none
-  include 'constants.h'
-
-  integer :: NSPEC_AB,NGLOB_AB
-  logical :: ABSORB_INSTEAD_OF_FREE_SURFACE
-
-! acoustic potentials
-  real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: &
-        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic
-
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-
-! free surface
-  integer :: num_free_surface_faces
-  integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
-  integer :: free_surface_ispec(num_free_surface_faces)
-
-  logical, dimension(NSPEC_AB) :: ispec_is_acoustic
-
-! local parameters
-  integer :: iface,igll,i,j,k,ispec,iglob
-
-  ! checks if free surface became an absorbing boundary
-  if( ABSORB_INSTEAD_OF_FREE_SURFACE ) return
-
-! enforce potentials to be zero at surface
-  do iface = 1, num_free_surface_faces
-
-    ispec = free_surface_ispec(iface)
-
-    if( ispec_is_acoustic(ispec) ) then
-
-      do igll = 1, NGLLSQUARE
-        i = free_surface_ijk(1,igll,iface)
-        j = free_surface_ijk(2,igll,iface)
-        k = free_surface_ijk(3,igll,iface)
-        iglob = ibool(i,j,k,ispec)
-
-        ! sets potentials to zero
-        potential_acoustic(iglob)         = 0._CUSTOM_REAL
-        potential_dot_acoustic(iglob)     = 0._CUSTOM_REAL
-        potential_dot_dot_acoustic(iglob) = 0._CUSTOM_REAL
-      enddo
-    endif
-
-  enddo
-
-end subroutine acoustic_enforce_free_surface
-

Copied: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_calling_routine.f90 (from rev 21237, seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic.f90)
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_calling_routine.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_calling_routine.f90	2013-01-16 18:04:01 UTC (rev 21239)
@@ -0,0 +1,443 @@
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 1
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and CNRS / INRIA / University of Pau
+! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
+!                             July 2012
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! acoustic solver
+
+! in case of an acoustic medium, a potential Chi of (density * displacement) is used as in Chaljub and Valette,
+! Geophysical Journal International, vol. 158, p. 131-141 (2004) and *NOT* a velocity potential
+! as in Komatitsch and Tromp, Geophysical Journal International, vol. 150, p. 303-318 (2002).
+!
+! This permits acoustic-elastic coupling based on a non-iterative time scheme.
+! Displacement is then:
+!     u = grad(Chi) / rho
+! Velocity is then:
+!     v = grad(Chi_dot) / rho
+! (Chi_dot being the time derivative of Chi)
+! and pressure is:
+!     p = - Chi_dot_dot
+! (Chi_dot_dot being the time second derivative of Chi).
+!
+! The source in an acoustic element is a pressure source.
+!
+! First-order acoustic-acoustic discontinuities are also handled automatically
+! because pressure is continuous at such an interface, therefore Chi_dot_dot
+! is continuous, therefore Chi is also continuous, which is consistent with
+! the spectral-element basis functions and with the assembling process.
+! This is the reason why a simple displacement potential u = grad(Chi) would
+! not work because it would be discontinuous at such an interface and would
+! therefore not be consistent with the basis functions.
+
+
+subroutine compute_forces_acoustic()
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+
+  implicit none
+
+  ! local parameters
+  integer:: iphase
+  logical:: phase_is_inner
+
+  ! enforces free surface (zeroes potentials at free surface)
+  if(.NOT. GPU_MODE) then
+    ! on CPU
+    call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB,ABSORB_INSTEAD_OF_FREE_SURFACE, &
+                        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
+                        ibool,free_surface_ijk,free_surface_ispec, &
+                        num_free_surface_faces,ispec_is_acoustic)
+
+    ! adjoint simulations
+    if( SIMULATION_TYPE == 3 ) &
+      call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_ADJOINT,ABSORB_INSTEAD_OF_FREE_SURFACE, &
+                        b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
+                        ibool,free_surface_ijk,free_surface_ispec, &
+                        num_free_surface_faces,ispec_is_acoustic)
+  else
+    ! on GPU
+    call acoustic_enforce_free_surf_cuda(Mesh_pointer,ABSORB_INSTEAD_OF_FREE_SURFACE)
+  endif
+
+  ! distinguishes two runs: for elements on MPI interfaces, and elements within the partitions
+  do iphase=1,2
+
+    !first for points on MPI interfaces, thus outer elements
+    if( iphase == 1 ) then
+      phase_is_inner = .false.
+    else
+      phase_is_inner = .true.
+    endif
+
+    ! acoustic pressure term
+    if(.NOT. GPU_MODE) then
+      ! on CPU
+      call compute_forces_acoustic_pot_noDev( iphase, NSPEC_AB,NGLOB_AB, &
+                        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
+                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                        hprime_xx,hprime_yy,hprime_zz, &
+                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                        rhostore,jacobian,ibool,deltat, &
+                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax,NSPEC2D_BOTTOM,NSPEC2D_TOP, &
+                        ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
+                        num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
+                        phase_ispec_inner_acoustic )
+
+      ! adjoint simulations
+      if( SIMULATION_TYPE == 3 ) &
+        call compute_forces_acoustic_pot_noDev( iphase, NSPEC_ADJOINT,NGLOB_ADJOINT, &
+                        b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
+                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                        hprime_xx,hprime_yy,hprime_zz, &
+                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                        rhostore,jacobian,ibool,deltat, &
+                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax,NSPEC2D_BOTTOM,NSPEC2D_TOP, &
+                        ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
+                        num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
+                        phase_ispec_inner_acoustic )
+    else
+      ! on GPU
+      ! includes code for SIMULATION_TYPE==3
+      call compute_forces_acoustic_cuda(Mesh_pointer, iphase, &
+                                        nspec_outer_acoustic, nspec_inner_acoustic)
+    endif
+
+    ! ! Stacey absorbing boundary conditions
+    if(ABSORBING_CONDITIONS) then
+       call compute_stacey_acoustic(NSPEC_AB,NGLOB_AB, &
+                         potential_dot_dot_acoustic,potential_dot_acoustic, &
+                         ibool,ispec_is_inner,phase_is_inner, &
+                         abs_boundary_jacobian2Dw,abs_boundary_ijk,abs_boundary_ispec, &
+                         num_abs_boundary_faces,rhostore,kappastore,ispec_is_acoustic, &
+                         SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it,NGLOB_ADJOINT, &
+                         b_potential_dot_dot_acoustic,b_reclen_potential, &
+                         b_absorb_potential,b_num_abs_boundary_faces, &
+                         GPU_MODE,Mesh_pointer)
+    endif
+
+    ! elastic coupling
+    if(ELASTIC_SIMULATION ) then
+      if( num_coupling_ac_el_faces > 0 ) then
+        if( .NOT. GPU_MODE ) then
+          if( SIMULATION_TYPE == 1 ) then
+            ! forward definition: \bfs=\frac{1}{\rho}\bfnabla\phi
+            call compute_coupling_acoustic_el(NSPEC_AB,NGLOB_AB, &
+                              ibool,displ,potential_dot_dot_acoustic, &
+                              num_coupling_ac_el_faces, &
+                              coupling_ac_el_ispec,coupling_ac_el_ijk, &
+                              coupling_ac_el_normal, &
+                              coupling_ac_el_jacobian2Dw, &
+                              ispec_is_inner,phase_is_inner)
+          else
+            ! handles adjoint runs coupling between adjoint potential and adjoint elastic wavefield
+            ! adjoint definition: \partial_t^2 \bfs^\dagger=-\frac{1}{\rho}\bfnabla\phi^\dagger
+            call compute_coupling_acoustic_el(NSPEC_AB,NGLOB_AB, &
+                              ibool,-accel_adj_coupling,potential_dot_dot_acoustic, &
+                              num_coupling_ac_el_faces, &
+                              coupling_ac_el_ispec,coupling_ac_el_ijk, &
+                              coupling_ac_el_normal, &
+                              coupling_ac_el_jacobian2Dw, &
+                              ispec_is_inner,phase_is_inner)
+          endif
+          ! adjoint/kernel simulations
+          if( SIMULATION_TYPE == 3 ) &
+            call compute_coupling_acoustic_el(NSPEC_ADJOINT,NGLOB_ADJOINT, &
+                            ibool,b_displ,b_potential_dot_dot_acoustic, &
+                            num_coupling_ac_el_faces, &
+                            coupling_ac_el_ispec,coupling_ac_el_ijk, &
+                            coupling_ac_el_normal, &
+                            coupling_ac_el_jacobian2Dw, &
+                            ispec_is_inner,phase_is_inner)
+
+        else
+          ! on GPU
+          call compute_coupling_ac_el_cuda(Mesh_pointer,phase_is_inner, &
+                                              num_coupling_ac_el_faces)
+        endif ! GPU_MODE
+      endif
+    endif
+
+! poroelastic coupling
+    if(POROELASTIC_SIMULATION )  then
+      if( num_coupling_ac_po_faces > 0 ) then
+        if( SIMULATION_TYPE == 1 ) then
+          call compute_coupling_acoustic_po(NSPEC_AB,NGLOB_AB, &
+                        ibool,displs_poroelastic,displw_poroelastic, &
+                        potential_dot_dot_acoustic, &
+                        num_coupling_ac_po_faces, &
+                        coupling_ac_po_ispec,coupling_ac_po_ijk, &
+                        coupling_ac_po_normal, &
+                        coupling_ac_po_jacobian2Dw, &
+                        ispec_is_inner,phase_is_inner)
+        else
+          stop 'not implemented yet'
+        endif
+        if( SIMULATION_TYPE == 3 ) &
+          stop 'not implemented yet'
+      endif
+    endif
+
+    ! sources
+    call compute_add_sources_acoustic(NSPEC_AB,NGLOB_AB,potential_dot_dot_acoustic, &
+                        ibool,ispec_is_inner,phase_is_inner, &
+                        NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,&
+                        xi_source,eta_source,gamma_source, &
+                        hdur,hdur_gaussian,tshift_src,dt,t0, &
+                        sourcearrays,kappastore,ispec_is_acoustic,&
+                        SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
+                        nrec,islice_selected_rec,ispec_selected_rec, &
+                        nadj_rec_local,adj_sourcearrays,b_potential_dot_dot_acoustic, &
+                        NTSTEP_BETWEEN_READ_ADJSRC, &
+                        GPU_MODE, Mesh_pointer)
+
+    ! assemble all the contributions between slices using MPI
+    if( phase_is_inner .eqv. .false. ) then
+      ! sends potential_dot_dot_acoustic values to corresponding MPI interface neighbors (non-blocking)
+      if(.NOT. GPU_MODE) then
+        call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
+                        buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
+                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+                        my_neighbours_ext_mesh, &
+                        request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
+      else
+        ! on GPU
+        call transfer_boun_pot_from_device(NGLOB_AB, Mesh_pointer, &
+                                            potential_dot_dot_acoustic, &
+                                            buffer_send_scalar_ext_mesh, &
+                                            num_interfaces_ext_mesh, &
+                                            max_nibool_interfaces_ext_mesh, &
+                                            nibool_interfaces_ext_mesh, &
+                                            ibool_interfaces_ext_mesh, &
+                                            1) ! <-- 1 == fwd accel
+        call assemble_MPI_scalar_send_cuda(NPROC, &
+                        buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
+                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,&
+                        my_neighbours_ext_mesh, &
+                        request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
+      endif
+
+      ! adjoint simulations
+      if( SIMULATION_TYPE == 3 ) then
+        if(.NOT. GPU_MODE) then
+          call assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
+                        b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, &
+                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+                        my_neighbours_ext_mesh, &
+                        b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
+        else
+          ! on GPU
+          call transfer_boun_pot_from_device(NGLOB_AB, Mesh_pointer, &
+                                                  b_potential_dot_dot_acoustic, &
+                                                  b_buffer_send_scalar_ext_mesh,&
+                                                  num_interfaces_ext_mesh, &
+                                                  max_nibool_interfaces_ext_mesh, &
+                                                  nibool_interfaces_ext_mesh, &
+                                                  ibool_interfaces_ext_mesh, &
+                                                  3) ! <-- 3 == adjoint b_accel
+
+          call assemble_MPI_scalar_send_cuda(NPROC, &
+                          b_buffer_send_scalar_ext_mesh,b_buffer_recv_scalar_ext_mesh, &
+                          num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                          nibool_interfaces_ext_mesh,&
+                          my_neighbours_ext_mesh, &
+                          b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
+
+        endif
+      endif
+
+    else
+
+      ! waits for send/receive requests to be completed and assembles values
+      if(.NOT. GPU_MODE) then
+        call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
+                        buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
+                        max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                        request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh)
+      else
+        ! on GPU
+        call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,potential_dot_dot_acoustic, &
+                        Mesh_pointer,&
+                        buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
+                        max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                        request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
+                        1)
+      endif
+
+      ! adjoint simulations
+      if( SIMULATION_TYPE == 3 ) then
+        if(.NOT. GPU_MODE) then
+          call assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_ADJOINT,b_potential_dot_dot_acoustic, &
+                        b_buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh,&
+                        max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                        b_request_send_scalar_ext_mesh,b_request_recv_scalar_ext_mesh)
+        else
+          ! on GPU
+          call assemble_MPI_scalar_write_cuda(NPROC,NGLOB_AB,b_potential_dot_dot_acoustic, &
+                        Mesh_pointer, &
+                        b_buffer_recv_scalar_ext_mesh,num_interfaces_ext_mesh, &
+                        max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                        b_request_send_scalar_ext_mesh,request_recv_scalar_ext_mesh, &
+                        3)
+        endif
+      endif
+    endif !phase_is_inner
+
+  enddo
+
+  if(.NOT. GPU_MODE) then
+    ! divides pressure with mass matrix
+    potential_dot_dot_acoustic(:) = potential_dot_dot_acoustic(:) * rmass_acoustic(:)
+
+    ! adjoint simulations
+    if (SIMULATION_TYPE == 3) &
+      b_potential_dot_dot_acoustic(:) = b_potential_dot_dot_acoustic(:) * rmass_acoustic(:)
+  else
+    ! on GPU
+    call kernel_3_a_acoustic_cuda(Mesh_pointer,NGLOB_AB)
+  endif
+
+! update velocity
+! note: Newmark finite-difference time scheme with acoustic domains:
+! (see e.g. Hughes, 1987; Chaljub et al., 2003)
+!
+! chi(t+delta_t) = chi(t) + delta_t chi_dot(t) + 1/2 delta_t**2 chi_dot_dot(t)
+! chi_dot(t+delta_t) = chi_dot(t) + 1/2 delta_t chi_dot_dot(t) + 1/2 DELTA_T CHI_DOT_DOT( T + DELTA_T )
+! chi_dot_dot(t+delta_t) = 1/M_acoustic( -K_acoustic chi(t+delta) + B_acoustic u(t+delta_t) + f(t+delta_t) )
+!
+! where
+!   chi, chi_dot, chi_dot_dot are acoustic (fluid) potentials ( dotted with respect to time)
+!   u, v, a are displacement,velocity & acceleration
+!   M is mass matrix, K stiffness matrix and B boundary term
+!   f denotes a source term
+!
+! corrector:
+!   updates the chi_dot term which requires chi_dot_dot(t+delta)
+ if( .NOT. GPU_MODE ) then
+    ! corrector
+    potential_dot_acoustic(:) = potential_dot_acoustic(:) + deltatover2*potential_dot_dot_acoustic(:)
+
+    ! adjoint simulations
+    if (SIMULATION_TYPE == 3) &
+      b_potential_dot_acoustic(:) = b_potential_dot_acoustic(:) + b_deltatover2*b_potential_dot_dot_acoustic(:)
+  else
+    ! on GPU
+    call kernel_3_b_acoustic_cuda(Mesh_pointer,NGLOB_AB,deltatover2,b_deltatover2)
+  endif
+
+! enforces free surface (zeroes potentials at free surface)
+  if(.NOT. GPU_MODE) then
+    ! on CPU
+    call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB,ABSORB_INSTEAD_OF_FREE_SURFACE, &
+                        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
+                        ibool,free_surface_ijk,free_surface_ispec, &
+                        num_free_surface_faces,ispec_is_acoustic)
+
+    if( SIMULATION_TYPE /= 1 ) then
+      potential_acoustic_adj_coupling(:) = potential_acoustic(:) &
+                            + deltat * potential_dot_acoustic(:) &
+                            + deltatsqover2 * potential_dot_dot_acoustic(:)
+    endif
+
+    ! adjoint simulations
+    if (SIMULATION_TYPE == 3) &
+      call acoustic_enforce_free_surface(NSPEC_AB,NGLOB_ADJOINT,ABSORB_INSTEAD_OF_FREE_SURFACE, &
+                        b_potential_acoustic,b_potential_dot_acoustic,b_potential_dot_dot_acoustic, &
+                        ibool,free_surface_ijk,free_surface_ispec, &
+                        num_free_surface_faces,ispec_is_acoustic)
+  else
+    ! on GPU
+    call acoustic_enforce_free_surf_cuda(Mesh_pointer,ABSORB_INSTEAD_OF_FREE_SURFACE)
+  endif
+
+end subroutine compute_forces_acoustic
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+subroutine acoustic_enforce_free_surface(NSPEC_AB,NGLOB_AB,ABSORB_INSTEAD_OF_FREE_SURFACE, &
+                        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
+                        ibool,free_surface_ijk,free_surface_ispec, &
+                        num_free_surface_faces,ispec_is_acoustic)
+  implicit none
+  include 'constants.h'
+
+  integer :: NSPEC_AB,NGLOB_AB
+  logical :: ABSORB_INSTEAD_OF_FREE_SURFACE
+
+! acoustic potentials
+  real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: &
+        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+
+! free surface
+  integer :: num_free_surface_faces
+  integer :: free_surface_ijk(3,NGLLSQUARE,num_free_surface_faces)
+  integer :: free_surface_ispec(num_free_surface_faces)
+
+  logical, dimension(NSPEC_AB) :: ispec_is_acoustic
+
+! local parameters
+  integer :: iface,igll,i,j,k,ispec,iglob
+
+  ! checks if free surface became an absorbing boundary
+  if( ABSORB_INSTEAD_OF_FREE_SURFACE ) return
+
+! enforce potentials to be zero at surface
+  do iface = 1, num_free_surface_faces
+
+    ispec = free_surface_ispec(iface)
+
+    if( ispec_is_acoustic(ispec) ) then
+
+      do igll = 1, NGLLSQUARE
+        i = free_surface_ijk(1,igll,iface)
+        j = free_surface_ijk(2,igll,iface)
+        k = free_surface_ijk(3,igll,iface)
+        iglob = ibool(i,j,k,ispec)
+
+        ! sets potentials to zero
+        potential_acoustic(iglob)         = 0._CUSTOM_REAL
+        potential_dot_acoustic(iglob)     = 0._CUSTOM_REAL
+        potential_dot_dot_acoustic(iglob) = 0._CUSTOM_REAL
+      enddo
+    endif
+
+  enddo
+
+end subroutine acoustic_enforce_free_surface
+

Deleted: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_pot.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_pot.f90	2013-01-16 17:48:05 UTC (rev 21238)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_pot.f90	2013-01-16 18:04:01 UTC (rev 21239)
@@ -1,394 +0,0 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 1
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and CNRS / INRIA / University of Pau
-! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
-!                             July 2012
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! for acoustic solver
-
-  subroutine compute_forces_acoustic_pot( iphase, NSPEC_AB,NGLOB_AB, &
-                        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_yy,hprime_zz, &
-                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        rhostore,jacobian,ibool,deltat, &
-                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax,NSPEC2D_BOTTOM,NSPEC2D_TOP, &
-                        ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
-                        num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
-                        phase_ispec_inner_acoustic )
-
-! computes forces for acoustic elements
-!
-! note that pressure is defined as:
-!     p = - Chi_dot_dot
-!
-  use specfem_par,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,TINYVAL_SNGL,ABSORB_USE_PML,ABSORBING_CONDITIONS,PML_CONDITIONS
-  use pml_par
-
-  implicit none
-
-  !include "constants.h"
-  integer :: NSPEC_AB,NGLOB_AB
-
-  ! acoustic potentials
-  real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: &
-        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic
-
-! time step
-  real(kind=CUSTOM_REAL) :: deltat
-
-  ! arrays with mesh parameters per slice
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        rhostore,jacobian
-
-  ! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
-  real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-! communication overlap
-!  logical, dimension(NSPEC_AB) :: ispec_is_inner
-!  logical :: phase_is_inner
-
-!  logical, dimension(NSPEC_AB) :: ispec_is_acoustic
-
-  integer :: iphase
-  integer :: num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic
-  integer, dimension(num_phase_ispec_acoustic,2) :: phase_ispec_inner_acoustic
-
-! C-PML absorbing boundary conditions
-  integer :: nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax,NSPEC2D_BOTTOM,NSPEC2D_TOP
-  integer, dimension(nspec2D_xmin) :: ibelm_xmin
-  integer, dimension(nspec2D_xmax) :: ibelm_xmax
-  integer, dimension(nspec2D_ymin) :: ibelm_ymin
-  integer, dimension(nspec2D_ymax) :: ibelm_ymax
-  integer, dimension(NSPEC2D_BOTTOM) :: ibelm_bottom
-  integer, dimension(NSPEC2D_TOP) :: ibelm_top
-
-! local variables
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
-       tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
-
-  real(kind=CUSTOM_REAL) :: sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy
-  real(kind=CUSTOM_REAL) :: hp1,hp2,hp3
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: chi_elem
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: temp1,temp2,temp3
-  real(kind=CUSTOM_REAL) :: temp1l,temp2l,temp3l
-  real(kind=CUSTOM_REAL) :: temp1l_new,temp2l_new,temp3l_new
-
-  real(kind=CUSTOM_REAL) :: lambdal,mul,lambdalplus2mul
-
-  real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
-  real(kind=CUSTOM_REAL) :: dpotentialdxl,dpotentialdyl,dpotentialdzl
-  real(kind=CUSTOM_REAL) :: dpotentialdxl_new,dpotentialdyl_new,dpotentialdzl_new
-  real(kind=CUSTOM_REAL) :: rho_invl
-
-  integer :: ispec,ispec2D,iglob,i,j,k,l,ispec_p,num_elements
-
-  ! local C-PML absorbing boundary conditions parameters
-  integer :: ispec_CPML
-
-  if( iphase == 1 ) then
-    num_elements = nspec_outer_acoustic
-  else
-    num_elements = nspec_inner_acoustic
-  endif
-
-  ! loop over spectral elements
-  do ispec_p = 1,num_elements
-
-    !if( (ispec_is_inner(ispec) .eqv. phase_is_inner) ) then
-
-    ispec = phase_ispec_inner_acoustic(ispec_p,iphase)
-
-    !if( ispec_is_acoustic(ispec) ) then
-
-    ! gets values for element
-    do k=1,NGLLZ
-      do j=1,NGLLY
-        do i=1,NGLLX
-          chi_elem(i,j,k) = potential_acoustic(ibool(i,j,k,ispec))
-        enddo
-      enddo
-    enddo
-
-    ! would check if anything to do, but might lower accuracy of computation
-    !if( maxval( abs( chi_elem ) ) < TINYVAL_SNGL ) cycle
-
-    do k=1,NGLLZ
-      do j=1,NGLLY
-        do i=1,NGLLX
-
-          ! derivative along x, y, z
-          ! first double loop over GLL points to compute and store gradients
-          ! we can merge the loops because NGLLX == NGLLY == NGLLZ
-          temp1l = 0._CUSTOM_REAL
-          temp2l = 0._CUSTOM_REAL
-          temp3l = 0._CUSTOM_REAL
-
-          do l = 1,NGLLX
-            temp1l = temp1l + chi_elem(l,j,k)*hprime_xx(i,l)
-            temp2l = temp2l + chi_elem(i,l,k)*hprime_yy(j,l)
-            temp3l = temp3l + chi_elem(i,j,l)*hprime_zz(k,l)
-          enddo
-
-          if( PML_CONDITIONS .and. CPML_mask_ibool(ispec) ) then
-             temp1l_new = temp1l
-             temp2l_new = temp2l
-             temp3l_new = temp3l
-
-             do l=1,NGLLX
-                hp1 = hprime_xx(l,i)
-                iglob = ibool(l,j,k,ispec)
-                temp1l_new = temp1l_new + deltat*potential_dot_acoustic(iglob)*hp1
-
-!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
-
-!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
-                hp2 = hprime_yy(l,j)
-                iglob = ibool(i,l,k,ispec)
-                temp2l_new = temp2l_new + deltat*potential_dot_acoustic(iglob)*hp2
-!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
-
-!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
-                hp3 = hprime_zz(l,k)
-                iglob = ibool(i,j,l,ispec)
-                temp3l_new = temp3l_new + deltat*potential_dot_acoustic(iglob)*hp3
-             enddo
-          endif
-
-         ! get derivatives of potential with respect to x, y and z
-          xixl = xix(i,j,k,ispec)
-          xiyl = xiy(i,j,k,ispec)
-          xizl = xiz(i,j,k,ispec)
-          etaxl = etax(i,j,k,ispec)
-          etayl = etay(i,j,k,ispec)
-          etazl = etaz(i,j,k,ispec)
-          gammaxl = gammax(i,j,k,ispec)
-          gammayl = gammay(i,j,k,ispec)
-          gammazl = gammaz(i,j,k,ispec)
-          jacobianl = jacobian(i,j,k,ispec)
-
-          ! derivatives of potential
-          dpotentialdxl = xixl*temp1l + etaxl*temp2l + gammaxl*temp3l
-          dpotentialdyl = xiyl*temp1l + etayl*temp2l + gammayl*temp3l
-          dpotentialdzl = xizl*temp1l + etazl*temp2l + gammazl*temp3l
-
-          ! stores derivatives of ux, uy and uz with respect to x, y and z
-          if( PML_CONDITIONS .and. CPML_mask_ibool(ispec) ) then
-             ispec_CPML = spec_to_CPML(ispec)
-
-             PML_dpotential_dxl(i,j,k,ispec_CPML) = dpotentialdxl
-             PML_dpotential_dyl(i,j,k,ispec_CPML) = dpotentialdyl
-             PML_dpotential_dzl(i,j,k,ispec_CPML) = dpotentialdzl
-
-             dpotentialdxl_new = xixl*temp1l_new + etaxl*temp2l_new + gammaxl*temp3l_new
-             dpotentialdyl_new = xiyl*temp1l_new + etayl*temp2l_new + gammayl*temp3l_new
-             dpotentialdzl_new = xizl*temp1l_new + etazl*temp2l_new + gammazl*temp3l_new
-
-             PML_dpotential_dxl_new(i,j,k,ispec_CPML) = dpotentialdxl_new
-             PML_dpotential_dyl_new(i,j,k,ispec_CPML) = dpotentialdyl_new
-             PML_dpotential_dzl_new(i,j,k,ispec_CPML) = dpotentialdzl_new
-          endif
-
-          ! density (reciproc)
-          rho_invl = 1.0_CUSTOM_REAL / rhostore(i,j,k,ispec)
-
-          ! for acoustic medium
-          ! also add GLL integration weights
-          temp1(i,j,k) = rho_invl * wgllwgll_yz(j,k) * jacobianl* &
-                        (xixl*dpotentialdxl + xiyl*dpotentialdyl + xizl*dpotentialdzl)
-          temp2(i,j,k) = rho_invl * wgllwgll_xz(i,k) * jacobianl* &
-                        (etaxl*dpotentialdxl + etayl*dpotentialdyl + etazl*dpotentialdzl)
-          temp3(i,j,k) = rho_invl * wgllwgll_xy(i,j) * jacobianl* &
-                        (gammaxl*dpotentialdxl + gammayl*dpotentialdyl + gammazl*dpotentialdzl)
-        enddo
-      enddo
-    enddo
-
-    if( PML_CONDITIONS .and. CPML_mask_ibool(ispec) ) then
-       ! sets C-PML elastic memory variables to compute stress sigma and form dot product with test vector
-       call pml_set_memory_variables(ispec,ispec_CPML,deltat,jacobianl,tempx1,tempy1,tempz1,tempx2,tempy2,tempz2, &
-                                    tempx3,tempy3,tempz3,sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz, &
-                                    sigma_yx,sigma_zx,sigma_zy,lambdal,mul,lambdalplus2mul,xixl,xiyl,xizl, &
-                                    etaxl,etayl,etazl,gammaxl,gammayl,gammazl)
-
-       ! calculates contribution from each C-PML element to update acceleration
-       call pml_set_accel_contribution(ispec,ispec_CPML,deltat,jacobianl,accel_elastic_CPML)
-    endif
-
-    ! second double-loop over GLL to compute all the terms
-    do k = 1,NGLLZ
-      do j = 1,NGLLZ
-        do i = 1,NGLLX
-
-          ! along x,y,z direction
-          ! and assemble the contributions
-          !!! can merge these loops because NGLLX = NGLLY = NGLLZ
-          temp1l = 0._CUSTOM_REAL
-          temp2l = 0._CUSTOM_REAL
-          temp3l = 0._CUSTOM_REAL
-
-          do l=1,NGLLX
-            temp1l = temp1l + temp1(l,j,k) * hprimewgll_xx(l,i)
-            temp2l = temp2l + temp2(i,l,k) * hprimewgll_yy(l,j)
-            temp3l = temp3l + temp3(i,j,l) * hprimewgll_zz(l,k)
-          enddo
-
-          ! sum contributions from each element to the global values
-          iglob = ibool(i,j,k,ispec)
-
-          potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) - ( temp1l + temp2l + temp3l )
-
-          ! updates potential_dot_dot_acoustic with contribution from each C-PML element
-          if( PML_CONDITIONS .and. CPML_mask_ibool(ispec) ) then
-             potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) - &
-                  potential_dot_dot_acoustic_CPML(i,j,k,ispec_CPML)
-          endif
-
-        enddo
-      enddo
-    enddo
-
-!      endif ! end of test if acoustic element
-!    endif ! ispec_is_inner
-
-  enddo ! end of loop over all spectral elements
-
-  ! C-PML boundary 
-  if( PML_CONDITIONS ) then
-     ! xmin
-     do ispec2D=1,nspec2D_xmin
-        ispec = ibelm_xmin(ispec2D)
-
-        i = 1
-
-        do k=1,NGLLZ
-           do j=1,NGLLY
-              iglob = ibool(i,j,k,ispec)
-
-              potential_dot_dot_acoustic(iglob) = 0.d0
-              potential_dot_acoustic(iglob) = 0.d0
-              potential_acoustic(iglob) = 0.d0
-           enddo
-        enddo
-     enddo
-
-     ! xmax
-     do ispec2D=1,nspec2D_xmax
-        ispec = ibelm_xmax(ispec2D)
-
-        i = NGLLX
-
-        do k=1,NGLLZ
-           do j=1,NGLLY
-              iglob = ibool(i,j,k,ispec)
-
-              potential_dot_dot_acoustic(iglob) = 0.d0
-              potential_dot_acoustic(iglob) = 0.d0
-              potential_acoustic(iglob) = 0.d0
-           enddo
-        enddo
-     enddo
-
-     ! ymin
-     do ispec2D=1,nspec2D_ymin
-        ispec = ibelm_ymin(ispec2D)
-
-        j = 1
-
-        do k=1,NGLLZ
-           do i=1,NGLLX
-              iglob = ibool(i,j,k,ispec)
-
-              potential_dot_dot_acoustic(iglob) = 0.d0
-              potential_dot_acoustic(iglob) = 0.d0
-              potential_acoustic(iglob) = 0.d0
-           enddo
-        enddo
-     enddo
-
-     ! ymax
-     do ispec2D=1,nspec2D_ymax
-        ispec = ibelm_ymax(ispec2D)
-
-        j = NGLLY
-
-        do k=1,NGLLZ
-           do i=1,NGLLX
-              iglob = ibool(i,j,k,ispec)
-
-              potential_dot_dot_acoustic(iglob) = 0.d0
-              potential_dot_acoustic(iglob) = 0.d0
-              potential_acoustic(iglob) = 0.d0
-           enddo
-        enddo
-     enddo
-
-     ! bottom (zmin)
-     do ispec2D=1,NSPEC2D_BOTTOM
-        ispec = ibelm_bottom(ispec2D)
-
-        k = 1
-
-        do j=1,NGLLY
-           do i=1,NGLLX
-              iglob = ibool(i,j,k,ispec)
-
-              potential_dot_dot_acoustic(iglob) = 0.d0
-              potential_dot_acoustic(iglob) = 0.d0
-              potential_acoustic(iglob) = 0.d0
-           enddo
-        enddo
-     enddo
-
-     ! top (zmax)
-     do ispec2D=1,NSPEC2D_BOTTOM
-        ispec = ibelm_top(ispec2D)
-
-        k = NGLLZ
-
-        do j=1,NGLLY
-           do i=1,NGLLX
-              iglob = ibool(i,j,k,ispec)
-
-              potential_dot_dot_acoustic(iglob) = 0.d0
-              potential_dot_acoustic(iglob) = 0.d0
-              potential_acoustic(iglob) = 0.d0
-           enddo
-        enddo
-     enddo
-
-  endif ! if( PML_CONDITIONS )
-
-  end subroutine compute_forces_acoustic_pot
-

Copied: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_pot_noDev.f90 (from rev 21237, seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_pot.f90)
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_pot_noDev.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_acoustic_pot_noDev.f90	2013-01-16 18:04:01 UTC (rev 21239)
@@ -0,0 +1,394 @@
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 1
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and CNRS / INRIA / University of Pau
+! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
+!                             July 2012
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! for acoustic solver
+
+  subroutine compute_forces_acoustic_pot_noDev( iphase, NSPEC_AB,NGLOB_AB, &
+                        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic, &
+                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                        hprime_xx,hprime_yy,hprime_zz, &
+                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                        rhostore,jacobian,ibool,deltat, &
+                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax,NSPEC2D_BOTTOM,NSPEC2D_TOP, &
+                        ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
+                        num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic,&
+                        phase_ispec_inner_acoustic )
+
+! computes forces for acoustic elements
+!
+! note that pressure is defined as:
+!     p = - Chi_dot_dot
+!
+  use specfem_par,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,TINYVAL_SNGL,ABSORB_USE_PML,ABSORBING_CONDITIONS,PML_CONDITIONS
+  use pml_par
+
+  implicit none
+
+  !include "constants.h"
+  integer :: NSPEC_AB,NGLOB_AB
+
+  ! acoustic potentials
+  real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: &
+        potential_acoustic,potential_dot_acoustic,potential_dot_dot_acoustic
+
+! time step
+  real(kind=CUSTOM_REAL) :: deltat
+
+  ! arrays with mesh parameters per slice
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+        rhostore,jacobian
+
+  ! array with derivatives of Lagrange polynomials and precalculated products
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
+  real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+! communication overlap
+!  logical, dimension(NSPEC_AB) :: ispec_is_inner
+!  logical :: phase_is_inner
+
+!  logical, dimension(NSPEC_AB) :: ispec_is_acoustic
+
+  integer :: iphase
+  integer :: num_phase_ispec_acoustic,nspec_inner_acoustic,nspec_outer_acoustic
+  integer, dimension(num_phase_ispec_acoustic,2) :: phase_ispec_inner_acoustic
+
+! C-PML absorbing boundary conditions
+  integer :: nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax,NSPEC2D_BOTTOM,NSPEC2D_TOP
+  integer, dimension(nspec2D_xmin) :: ibelm_xmin
+  integer, dimension(nspec2D_xmax) :: ibelm_xmax
+  integer, dimension(nspec2D_ymin) :: ibelm_ymin
+  integer, dimension(nspec2D_ymax) :: ibelm_ymax
+  integer, dimension(NSPEC2D_BOTTOM) :: ibelm_bottom
+  integer, dimension(NSPEC2D_TOP) :: ibelm_top
+
+! local variables
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+       tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
+
+  real(kind=CUSTOM_REAL) :: sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy
+  real(kind=CUSTOM_REAL) :: hp1,hp2,hp3
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: chi_elem
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: temp1,temp2,temp3
+  real(kind=CUSTOM_REAL) :: temp1l,temp2l,temp3l
+  real(kind=CUSTOM_REAL) :: temp1l_new,temp2l_new,temp3l_new
+
+  real(kind=CUSTOM_REAL) :: lambdal,mul,lambdalplus2mul
+
+  real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+  real(kind=CUSTOM_REAL) :: dpotentialdxl,dpotentialdyl,dpotentialdzl
+  real(kind=CUSTOM_REAL) :: dpotentialdxl_new,dpotentialdyl_new,dpotentialdzl_new
+  real(kind=CUSTOM_REAL) :: rho_invl
+
+  integer :: ispec,ispec2D,iglob,i,j,k,l,ispec_p,num_elements
+
+  ! local C-PML absorbing boundary conditions parameters
+  integer :: ispec_CPML
+
+  if( iphase == 1 ) then
+    num_elements = nspec_outer_acoustic
+  else
+    num_elements = nspec_inner_acoustic
+  endif
+
+  ! loop over spectral elements
+  do ispec_p = 1,num_elements
+
+    !if( (ispec_is_inner(ispec) .eqv. phase_is_inner) ) then
+
+    ispec = phase_ispec_inner_acoustic(ispec_p,iphase)
+
+    !if( ispec_is_acoustic(ispec) ) then
+
+    ! gets values for element
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+          chi_elem(i,j,k) = potential_acoustic(ibool(i,j,k,ispec))
+        enddo
+      enddo
+    enddo
+
+    ! would check if anything to do, but might lower accuracy of computation
+    !if( maxval( abs( chi_elem ) ) < TINYVAL_SNGL ) cycle
+
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          ! derivative along x, y, z
+          ! first double loop over GLL points to compute and store gradients
+          ! we can merge the loops because NGLLX == NGLLY == NGLLZ
+          temp1l = 0._CUSTOM_REAL
+          temp2l = 0._CUSTOM_REAL
+          temp3l = 0._CUSTOM_REAL
+
+          do l = 1,NGLLX
+            temp1l = temp1l + chi_elem(l,j,k)*hprime_xx(i,l)
+            temp2l = temp2l + chi_elem(i,l,k)*hprime_yy(j,l)
+            temp3l = temp3l + chi_elem(i,j,l)*hprime_zz(k,l)
+          enddo
+
+          if( PML_CONDITIONS .and. CPML_mask_ibool(ispec) ) then
+             temp1l_new = temp1l
+             temp2l_new = temp2l
+             temp3l_new = temp3l
+
+             do l=1,NGLLX
+                hp1 = hprime_xx(l,i)
+                iglob = ibool(l,j,k,ispec)
+                temp1l_new = temp1l_new + deltat*potential_dot_acoustic(iglob)*hp1
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+                hp2 = hprime_yy(l,j)
+                iglob = ibool(i,l,k,ispec)
+                temp2l_new = temp2l_new + deltat*potential_dot_acoustic(iglob)*hp2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+                hp3 = hprime_zz(l,k)
+                iglob = ibool(i,j,l,ispec)
+                temp3l_new = temp3l_new + deltat*potential_dot_acoustic(iglob)*hp3
+             enddo
+          endif
+
+         ! get derivatives of potential with respect to x, y and z
+          xixl = xix(i,j,k,ispec)
+          xiyl = xiy(i,j,k,ispec)
+          xizl = xiz(i,j,k,ispec)
+          etaxl = etax(i,j,k,ispec)
+          etayl = etay(i,j,k,ispec)
+          etazl = etaz(i,j,k,ispec)
+          gammaxl = gammax(i,j,k,ispec)
+          gammayl = gammay(i,j,k,ispec)
+          gammazl = gammaz(i,j,k,ispec)
+          jacobianl = jacobian(i,j,k,ispec)
+
+          ! derivatives of potential
+          dpotentialdxl = xixl*temp1l + etaxl*temp2l + gammaxl*temp3l
+          dpotentialdyl = xiyl*temp1l + etayl*temp2l + gammayl*temp3l
+          dpotentialdzl = xizl*temp1l + etazl*temp2l + gammazl*temp3l
+
+          ! stores derivatives of ux, uy and uz with respect to x, y and z
+          if( PML_CONDITIONS .and. CPML_mask_ibool(ispec) ) then
+             ispec_CPML = spec_to_CPML(ispec)
+
+             PML_dpotential_dxl(i,j,k,ispec_CPML) = dpotentialdxl
+             PML_dpotential_dyl(i,j,k,ispec_CPML) = dpotentialdyl
+             PML_dpotential_dzl(i,j,k,ispec_CPML) = dpotentialdzl
+
+             dpotentialdxl_new = xixl*temp1l_new + etaxl*temp2l_new + gammaxl*temp3l_new
+             dpotentialdyl_new = xiyl*temp1l_new + etayl*temp2l_new + gammayl*temp3l_new
+             dpotentialdzl_new = xizl*temp1l_new + etazl*temp2l_new + gammazl*temp3l_new
+
+             PML_dpotential_dxl_new(i,j,k,ispec_CPML) = dpotentialdxl_new
+             PML_dpotential_dyl_new(i,j,k,ispec_CPML) = dpotentialdyl_new
+             PML_dpotential_dzl_new(i,j,k,ispec_CPML) = dpotentialdzl_new
+          endif
+
+          ! density (reciproc)
+          rho_invl = 1.0_CUSTOM_REAL / rhostore(i,j,k,ispec)
+
+          ! for acoustic medium
+          ! also add GLL integration weights
+          temp1(i,j,k) = rho_invl * wgllwgll_yz(j,k) * jacobianl* &
+                        (xixl*dpotentialdxl + xiyl*dpotentialdyl + xizl*dpotentialdzl)
+          temp2(i,j,k) = rho_invl * wgllwgll_xz(i,k) * jacobianl* &
+                        (etaxl*dpotentialdxl + etayl*dpotentialdyl + etazl*dpotentialdzl)
+          temp3(i,j,k) = rho_invl * wgllwgll_xy(i,j) * jacobianl* &
+                        (gammaxl*dpotentialdxl + gammayl*dpotentialdyl + gammazl*dpotentialdzl)
+        enddo
+      enddo
+    enddo
+
+    if( PML_CONDITIONS .and. CPML_mask_ibool(ispec) ) then
+       ! sets C-PML elastic memory variables to compute stress sigma and form dot product with test vector
+       call pml_set_memory_variables(ispec,ispec_CPML,deltat,jacobianl,tempx1,tempy1,tempz1,tempx2,tempy2,tempz2, &
+                                    tempx3,tempy3,tempz3,sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz, &
+                                    sigma_yx,sigma_zx,sigma_zy,lambdal,mul,lambdalplus2mul,xixl,xiyl,xizl, &
+                                    etaxl,etayl,etazl,gammaxl,gammayl,gammazl)
+
+       ! calculates contribution from each C-PML element to update acceleration
+       call pml_set_accel_contribution(ispec,ispec_CPML,deltat,jacobianl,accel_elastic_CPML)
+    endif
+
+    ! second double-loop over GLL to compute all the terms
+    do k = 1,NGLLZ
+      do j = 1,NGLLZ
+        do i = 1,NGLLX
+
+          ! along x,y,z direction
+          ! and assemble the contributions
+          !!! can merge these loops because NGLLX = NGLLY = NGLLZ
+          temp1l = 0._CUSTOM_REAL
+          temp2l = 0._CUSTOM_REAL
+          temp3l = 0._CUSTOM_REAL
+
+          do l=1,NGLLX
+            temp1l = temp1l + temp1(l,j,k) * hprimewgll_xx(l,i)
+            temp2l = temp2l + temp2(i,l,k) * hprimewgll_yy(l,j)
+            temp3l = temp3l + temp3(i,j,l) * hprimewgll_zz(l,k)
+          enddo
+
+          ! sum contributions from each element to the global values
+          iglob = ibool(i,j,k,ispec)
+
+          potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) - ( temp1l + temp2l + temp3l )
+
+          ! updates potential_dot_dot_acoustic with contribution from each C-PML element
+          if( PML_CONDITIONS .and. CPML_mask_ibool(ispec) ) then
+             potential_dot_dot_acoustic(iglob) = potential_dot_dot_acoustic(iglob) - &
+                  potential_dot_dot_acoustic_CPML(i,j,k,ispec_CPML)
+          endif
+
+        enddo
+      enddo
+    enddo
+
+!      endif ! end of test if acoustic element
+!    endif ! ispec_is_inner
+
+  enddo ! end of loop over all spectral elements
+
+  ! C-PML boundary 
+  if( PML_CONDITIONS ) then
+     ! xmin
+     do ispec2D=1,nspec2D_xmin
+        ispec = ibelm_xmin(ispec2D)
+
+        i = 1
+
+        do k=1,NGLLZ
+           do j=1,NGLLY
+              iglob = ibool(i,j,k,ispec)
+
+              potential_dot_dot_acoustic(iglob) = 0.d0
+              potential_dot_acoustic(iglob) = 0.d0
+              potential_acoustic(iglob) = 0.d0
+           enddo
+        enddo
+     enddo
+
+     ! xmax
+     do ispec2D=1,nspec2D_xmax
+        ispec = ibelm_xmax(ispec2D)
+
+        i = NGLLX
+
+        do k=1,NGLLZ
+           do j=1,NGLLY
+              iglob = ibool(i,j,k,ispec)
+
+              potential_dot_dot_acoustic(iglob) = 0.d0
+              potential_dot_acoustic(iglob) = 0.d0
+              potential_acoustic(iglob) = 0.d0
+           enddo
+        enddo
+     enddo
+
+     ! ymin
+     do ispec2D=1,nspec2D_ymin
+        ispec = ibelm_ymin(ispec2D)
+
+        j = 1
+
+        do k=1,NGLLZ
+           do i=1,NGLLX
+              iglob = ibool(i,j,k,ispec)
+
+              potential_dot_dot_acoustic(iglob) = 0.d0
+              potential_dot_acoustic(iglob) = 0.d0
+              potential_acoustic(iglob) = 0.d0
+           enddo
+        enddo
+     enddo
+
+     ! ymax
+     do ispec2D=1,nspec2D_ymax
+        ispec = ibelm_ymax(ispec2D)
+
+        j = NGLLY
+
+        do k=1,NGLLZ
+           do i=1,NGLLX
+              iglob = ibool(i,j,k,ispec)
+
+              potential_dot_dot_acoustic(iglob) = 0.d0
+              potential_dot_acoustic(iglob) = 0.d0
+              potential_acoustic(iglob) = 0.d0
+           enddo
+        enddo
+     enddo
+
+     ! bottom (zmin)
+     do ispec2D=1,NSPEC2D_BOTTOM
+        ispec = ibelm_bottom(ispec2D)
+
+        k = 1
+
+        do j=1,NGLLY
+           do i=1,NGLLX
+              iglob = ibool(i,j,k,ispec)
+
+              potential_dot_dot_acoustic(iglob) = 0.d0
+              potential_dot_acoustic(iglob) = 0.d0
+              potential_acoustic(iglob) = 0.d0
+           enddo
+        enddo
+     enddo
+
+     ! top (zmax)
+     do ispec2D=1,NSPEC2D_BOTTOM
+        ispec = ibelm_top(ispec2D)
+
+        k = NGLLZ
+
+        do j=1,NGLLY
+           do i=1,NGLLX
+              iglob = ibool(i,j,k,ispec)
+
+              potential_dot_dot_acoustic(iglob) = 0.d0
+              potential_dot_acoustic(iglob) = 0.d0
+              potential_acoustic(iglob) = 0.d0
+           enddo
+        enddo
+     enddo
+
+  endif ! if( PML_CONDITIONS )
+
+  end subroutine compute_forces_acoustic_pot_noDev
+

Deleted: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic.F90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic.F90	2013-01-16 17:48:05 UTC (rev 21238)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic.F90	2013-01-16 18:04:01 UTC (rev 21239)
@@ -1,553 +0,0 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 1
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and CNRS / INRIA / University of Pau
-! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
-!                             July 2012
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! elastic solver
-
-subroutine compute_forces_elastic()
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-  use fault_solver_dynamic, only : bc_dynflt_set3d_all,SIMULATION_TYPE_DYN
-  use fault_solver_kinematic, only : bc_kinflt_set_all,SIMULATION_TYPE_KIN
-
-  implicit none
-
-  integer:: iphase
-  logical:: phase_is_inner
-
-! distinguishes two runs: for points on MPI interfaces, and points within the partitions
-  do iphase=1,2
-
-    !first for points on MPI interfaces
-    if( iphase == 1 ) then
-      phase_is_inner = .false.
-    else
-      phase_is_inner = .true.
-    endif
-
-
-! elastic term
-    if( .NOT. GPU_MODE ) then
-      if(USE_DEVILLE_PRODUCTS) then
-        ! uses Deville (2002) optimizations
-        call compute_forces_elastic_Dev_sim1(iphase)
-
-        ! adjoint simulations: backward/reconstructed wavefield
-        if( SIMULATION_TYPE == 3 ) &
-          call compute_forces_elastic_Dev_sim3(iphase)
-
-      else
-        ! no optimizations used
-        call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_yy,hprime_zz, &
-                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION,deltat,PML_CONDITIONS, &
-                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax,NSPEC2D_BOTTOM,NSPEC2D_TOP, &
-                        ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
-                        one_minus_sum_beta,factor_common, &
-                        alphaval,betaval,gammaval,&
-                        NSPEC_ATTENUATION_AB, &
-                        R_xx,R_yy,R_xy,R_xz,R_yz, &
-                        epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                        epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store, &
-                        c22store,c23store,c24store,c25store,c26store,c33store, &
-                        c34store,c35store,c36store,c44store,c45store,c46store, &
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
-                        is_moho_top,is_moho_bot, &
-                        dsdx_top,dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic, &
-                        phase_ispec_inner_elastic,ispec_is_elastic )
-
-        ! adjoint simulations: backward/reconstructed wavefield
-        if( SIMULATION_TYPE == 3 ) &
-          call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB, &
-                        b_displ,b_veloc,b_accel, &
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_yy,hprime_zz, &
-                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                        kappastore,mustore,jacobian,ibool, &
-                        ATTENUATION,deltat,PML_CONDITIONS, &
-                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax,NSPEC2D_BOTTOM,NSPEC2D_TOP, &
-                        ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
-                        one_minus_sum_beta,factor_common, &
-                        b_alphaval,b_betaval,b_gammaval, &
-                        NSPEC_ATTENUATION_AB, &
-                        b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                        b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                        b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store, &
-                        c22store,c23store,c24store,c25store,c26store,c33store, &
-                        c34store,c35store,c36store,c44store,c45store,c46store, &
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
-                        is_moho_top,is_moho_bot, &
-                        b_dsdx_top,b_dsdx_bot, &
-                        ispec2D_moho_top,ispec2D_moho_bot, &
-                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic, &
-                        phase_ispec_inner_elastic  )
-
-      endif
-
-    else
-      ! on GPU
-      ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
-      call compute_forces_elastic_cuda(Mesh_pointer, iphase, deltat, &
-                                      nspec_outer_elastic, &
-                                      nspec_inner_elastic, &
-                                      COMPUTE_AND_STORE_STRAIN,ATTENUATION,ANISOTROPY)
-
-      if(phase_is_inner .eqv. .true.) then
-         ! while Inner elements compute "Kernel_2", we wait for MPI to
-         ! finish and transfer the boundary terms to the device
-         ! asynchronously
-
-         !daniel: todo - this avoids calling the fortran vector send from CUDA routine
-         ! wait for asynchronous copy to finish
-         call sync_copy_from_device(Mesh_pointer,iphase,buffer_send_vector_ext_mesh)
-         ! sends mpi buffers
-         call assemble_MPI_vector_send_cuda(NPROC, &
-                  buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
-                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                  nibool_interfaces_ext_mesh,&
-                  my_neighbours_ext_mesh, &
-                  request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
-
-         ! transfers mpi buffers onto GPU
-         call transfer_boundary_to_device(NPROC,Mesh_pointer,buffer_recv_vector_ext_mesh, &
-                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                  request_recv_vector_ext_mesh)
-      endif ! inner elements
-
-   endif ! GPU_MODE
-
-
-! adds elastic absorbing boundary term to acceleration (Stacey conditions)
-    if( ABSORBING_CONDITIONS ) then
-       call compute_stacey_elastic(NSPEC_AB,NGLOB_AB,accel, &
-                        ibool,ispec_is_inner,phase_is_inner, &
-                        abs_boundary_normal,abs_boundary_jacobian2Dw, &
-                        abs_boundary_ijk,abs_boundary_ispec, &
-                        num_abs_boundary_faces, &
-                        veloc,rho_vp,rho_vs, &
-                        ispec_is_elastic,SIMULATION_TYPE,SAVE_FORWARD, &
-                        NSTEP,it,NGLOB_ADJOINT,b_accel, &
-                        b_num_abs_boundary_faces,b_reclen_field,b_absorb_field,&
-                        GPU_MODE,Mesh_pointer)
-    endif
-
-
-! acoustic coupling
-    if( ACOUSTIC_SIMULATION ) then
-      if( num_coupling_ac_el_faces > 0 ) then
-        if( .NOT. GPU_MODE ) then
-          if( SIMULATION_TYPE == 1 ) then
-            ! forward definition: pressure=-potential_dot_dot
-            call compute_coupling_elastic_ac(NSPEC_AB,NGLOB_AB, &
-                        ibool,accel,potential_dot_dot_acoustic, &
-                        num_coupling_ac_el_faces, &
-                        coupling_ac_el_ispec,coupling_ac_el_ijk, &
-                        coupling_ac_el_normal, &
-                        coupling_ac_el_jacobian2Dw, &
-                        ispec_is_inner,phase_is_inner)
-          else
-            ! handles adjoint runs coupling between adjoint potential and adjoint elastic wavefield
-            ! adoint definition: pressure^\dagger=potential^\dagger
-            call compute_coupling_elastic_ac(NSPEC_AB,NGLOB_AB, &
-                              ibool,accel,-potential_acoustic_adj_coupling, &
-                              num_coupling_ac_el_faces, &
-                              coupling_ac_el_ispec,coupling_ac_el_ijk, &
-                              coupling_ac_el_normal, &
-                              coupling_ac_el_jacobian2Dw, &
-                              ispec_is_inner,phase_is_inner)
-          endif
-
-        ! adjoint simulations
-        if( SIMULATION_TYPE == 3 ) &
-          call compute_coupling_elastic_ac(NSPEC_ADJOINT,NGLOB_ADJOINT, &
-                        ibool,b_accel,b_potential_dot_dot_acoustic, &
-                        num_coupling_ac_el_faces, &
-                        coupling_ac_el_ispec,coupling_ac_el_ijk, &
-                        coupling_ac_el_normal, &
-                        coupling_ac_el_jacobian2Dw, &
-                        ispec_is_inner,phase_is_inner)
-
-        else
-          ! on GPU
-          call compute_coupling_el_ac_cuda(Mesh_pointer,phase_is_inner, &
-                                           num_coupling_ac_el_faces)
-        endif ! GPU_MODE
-      endif ! num_coupling_ac_el_faces
-    endif
-
-
-! poroelastic coupling
-    if( POROELASTIC_SIMULATION ) then
-      call compute_coupling_elastic_po(NSPEC_AB,NGLOB_AB,ibool,&
-                        displs_poroelastic,displw_poroelastic,&
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_yy,hprime_zz,&
-                        kappaarraystore,rhoarraystore,mustore, &
-                        phistore,tortstore,jacobian,&
-                        displ,accel,kappastore, &
-                        ANISOTROPY,NSPEC_ANISO, &
-                        c11store,c12store,c13store,c14store,c15store,c16store,&
-                        c22store,c23store,c24store,c25store,c26store,c33store,&
-                        c34store,c35store,c36store,c44store,c45store,c46store,&
-                        c55store,c56store,c66store, &
-                        SIMULATION_TYPE,NGLOB_ADJOINT,NSPEC_ADJOINT, &
-                        num_coupling_el_po_faces, &
-                        coupling_el_po_ispec,coupling_po_el_ispec, &
-                        coupling_el_po_ijk,coupling_po_el_ijk, &
-                        coupling_el_po_normal, &
-                        coupling_el_po_jacobian2Dw, &
-                        ispec_is_inner,phase_is_inner)
-    endif
-
-! adds source term (single-force/moment-tensor solution)
-    call compute_add_sources_elastic( NSPEC_AB,NGLOB_AB,accel, &
-                        ibool,ispec_is_inner,phase_is_inner, &
-                        NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,&
-                        hdur,hdur_gaussian,tshift_src,dt,t0,sourcearrays, &
-                        ispec_is_elastic,SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
-                        nrec,islice_selected_rec,ispec_selected_rec, &
-                        nadj_rec_local,adj_sourcearrays,b_accel, &
-                        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY, &
-                        GPU_MODE, Mesh_pointer )
-
-    ! assemble all the contributions between slices using MPI
-    if( phase_is_inner .eqv. .false. ) then
-       ! sends accel values to corresponding MPI interface neighbors
-       if(.NOT. GPU_MODE) then
-          call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB,accel, &
-               buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
-               num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-               nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
-               my_neighbours_ext_mesh, &
-               request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
-       else ! GPU_MODE==1
-          ! transfers boundary region to host asynchronously. The
-          ! MPI-send is done from within compute_forces_elastic_cuda,
-          ! once the inner element kernels are launched, and the
-          ! memcpy has finished. see compute_forces_elastic_cuda:1655
-          call transfer_boundary_from_device_a(Mesh_pointer,nspec_outer_elastic)
-       endif ! GPU_MODE
-
-       ! adjoint simulations
-       if( SIMULATION_TYPE == 3 ) then
-          if(.NOT. GPU_MODE) then
-             call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_accel, &
-                  b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
-                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                  nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
-                  my_neighbours_ext_mesh, &
-                  b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
-          else ! GPU_MODE == 1
-             call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, b_accel,&
-                       b_buffer_send_vector_ext_mesh,&
-                       num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
-                       nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,3) ! <-- 3 == adjoint b_accel
-             call assemble_MPI_vector_send_cuda(NPROC, &
-                  b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
-                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
-                  nibool_interfaces_ext_mesh,&
-                  my_neighbours_ext_mesh, &
-                  b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
-          endif ! GPU
-       endif !adjoint
-
-    else
-      ! waits for send/receive requests to be completed and assembles values
-      if(.NOT. GPU_MODE) then
-         call assemble_MPI_vector_ext_mesh_w_ordered(NPROC,NGLOB_AB,accel, &
-                            buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-                            max_nibool_interfaces_ext_mesh, &
-                            nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                            request_send_vector_ext_mesh,request_recv_vector_ext_mesh, &
-                            my_neighbours_ext_mesh,myrank)
-
-      else ! GPU_MODE == 1
-         call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,accel, Mesh_pointer,&
-                            buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-                            max_nibool_interfaces_ext_mesh, &
-                            nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                            request_send_vector_ext_mesh,request_recv_vector_ext_mesh,1)
-      endif
-      ! adjoint simulations
-      if( SIMULATION_TYPE == 3 ) then
-         if(.NOT. GPU_MODE) then
-            call assemble_MPI_vector_ext_mesh_w_ordered(NPROC,NGLOB_ADJOINT,b_accel, &
-                             b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-                             max_nibool_interfaces_ext_mesh, &
-                             nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                             b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh, &
-                             my_neighbours_ext_mesh,myrank)
-
-         else ! GPU_MODE == 1
-            call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,b_accel, Mesh_pointer,&
-                              b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
-                              max_nibool_interfaces_ext_mesh, &
-                              nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
-                              b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh,3)
-         endif
-      endif !adjoint
-
-    endif
-
-    !! DK DK May 2009: removed this because now each slice of a CUBIT + SCOTCH mesh
-    !! DK DK May 2009: has a different number of spectral elements and therefore
-    !! DK DK May 2009: only the general non-blocking MPI routines assemble_MPI_vector_ext_mesh_s
-    !! DK DK May 2009: and assemble_MPI_vector_ext_mesh_w above can be used.
-    !! DK DK May 2009: For adjoint runs below (SIMULATION_TYPE == 3) they should be used as well.
-
- enddo
-
-!Percy , Fault boundary term B*tau is added to the assembled forces 
-!        which at this point are stored in the array 'accel'
-  if (SIMULATION_TYPE_DYN) call bc_dynflt_set3d_all(accel,veloc,displ)
-  
-  if (SIMULATION_TYPE_KIN) call bc_kinflt_set_all(accel,veloc,displ)
-
- ! multiplies with inverse of mass matrix (note: rmass has been inverted already)
- if(.NOT. GPU_MODE) then
-    accel(1,:) = accel(1,:)*rmassx(:)
-    accel(2,:) = accel(2,:)*rmassy(:)
-    accel(3,:) = accel(3,:)*rmassz(:)
-    ! adjoint simulations
-    if (SIMULATION_TYPE == 3) then
-       b_accel(1,:) = b_accel(1,:)*rmassx(:)
-       b_accel(2,:) = b_accel(2,:)*rmassy(:)
-       b_accel(3,:) = b_accel(3,:)*rmassz(:)
-    endif !adjoint
- else ! GPU_MODE == 1
-    call kernel_3_a_cuda(Mesh_pointer, NGLOB_AB, deltatover2,b_deltatover2,OCEANS)
- endif
-
-! updates acceleration with ocean load term
-  if(OCEANS) then
-    if( .NOT. GPU_MODE ) then
-      call compute_coupling_ocean(NSPEC_AB,NGLOB_AB, &
-                                  ibool,rmassx,rmassy,rmassz, &
-                                  rmass_ocean_load,accel, &
-                                  free_surface_normal,free_surface_ijk,free_surface_ispec, &
-                                  num_free_surface_faces,SIMULATION_TYPE, &
-                                  NGLOB_ADJOINT,b_accel)
-    else
-      ! on GPU
-      call compute_coupling_ocean_cuda(Mesh_pointer)
-    endif
-  endif
-
-! updates velocities
-! Newmark finite-difference time scheme with elastic domains:
-! (see e.g. Hughes, 1987; Chaljub et al., 2003)
-!
-! u(t+delta_t) = u(t) + delta_t  v(t) + 1/2  delta_t**2 a(t)
-! v(t+delta_t) = v(t) + 1/2 delta_t a(t) + 1/2 delta_t a(t+delta_t)
-! a(t+delta_t) = 1/M_elastic ( -K_elastic u(t+delta) + B_elastic chi_dot_dot(t+delta_t) + f( t+delta_t) )
-!
-! where
-!   u, v, a are displacement,velocity & acceleration
-!   M is mass matrix, K stiffness matrix and B boundary term for acoustic/elastic domains
-!   f denotes a source term (acoustic/elastic)
-!   chi_dot_dot is acoustic (fluid) potential ( dotted twice with respect to time)
-!
-! corrector:
-!   updates the velocity term which requires a(t+delta)
-! GPU_MODE: this is handled in 'kernel_3' at the same time as accel*rmass
-  if(.NOT. GPU_MODE) then
-     veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
-     ! adjoint simulations
-     if (SIMULATION_TYPE == 3) b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:)
-  else ! GPU_MODE == 1
-    if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,b_deltatover2)
-  endif
-
-
-end subroutine compute_forces_elastic
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! distributes routines according to chosen NGLLX in constants.h
-
-!daniel: note -- i put it here rather than in compute_forces_elastic_Dev.f90 because compiler complains that:
-! " The storage extent of the dummy argument exceeds that of the actual argument. "
-
-subroutine compute_forces_elastic_Dev_sim1(iphase)
-
-! forward simulations
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-
-  implicit none
-
-  integer,intent(in) :: iphase
-
-  select case(NGLLX)
-
-  case (5)
-
-!----------------------------------------------------------------------------------------------
-
-! OpenMP routine flag for testing & benchmarking forward runs only
-! configure additional flag, e.g.: FLAGS_NO_CHECK="-O3 -DOPENMP_MODE -openmp"
-
-!----------------------------------------------------------------------------------------------
-#ifdef OPENMP_MODE
-    call compute_forces_elastic_Dev_openmp(iphase, NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
-           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-           hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-           wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-           kappastore,mustore,jacobian,ibool, &
-           ATTENUATION,deltat, &
-           one_minus_sum_beta,factor_common, &
-           alphaval,betaval,gammaval, &
-           NSPEC_ATTENUATION_AB, &
-           R_xx,R_yy,R_xy,R_xz,R_yz, &
-           epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-           epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-           ANISOTROPY,NSPEC_ANISO, &
-           c11store,c12store,c13store,c14store,c15store,c16store,&
-           c22store,c23store,c24store,c25store,c26store,c33store,&
-           c34store,c35store,c36store,c44store,c45store,c46store,&
-           c55store,c56store,c66store, &
-           SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-           NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-           is_moho_top,is_moho_bot, &
-           dsdx_top,dsdx_bot, &
-           ispec2D_moho_top,ispec2D_moho_bot, &
-           num_phase_ispec_elastic,&
-           phase_ispec_inner_elastic,&
-           num_colors_outer_elastic,num_colors_inner_elastic)
-#else
-    call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
-             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-             hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-             wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-             kappastore,mustore,jacobian,ibool, &
-             ATTENUATION,deltat, &
-             one_minus_sum_beta,factor_common, &
-             alphaval,betaval,gammaval, &
-             NSPEC_ATTENUATION_AB, &
-             R_xx,R_yy,R_xy,R_xz,R_yz, &
-             epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-             epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-             ANISOTROPY,NSPEC_ANISO, &
-             c11store,c12store,c13store,c14store,c15store,c16store,&
-             c22store,c23store,c24store,c25store,c26store,c33store,&
-             c34store,c35store,c36store,c44store,c45store,c46store,&
-             c55store,c56store,c66store, &
-             SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-             NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-             is_moho_top,is_moho_bot, &
-             dsdx_top,dsdx_bot, &
-             ispec2D_moho_top,ispec2D_moho_bot, &
-             num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-             phase_ispec_inner_elastic )
-#endif
-
-  case default
-
-    stop 'error no Deville routine available for chosen NGLLX'
-
-  end select
-
-end subroutine compute_forces_elastic_Dev_sim1
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
-subroutine compute_forces_elastic_Dev_sim3(iphase)
-
-! uses backward/reconstructed displacement and acceleration arrays
-
-  use specfem_par
-  use specfem_par_acoustic
-  use specfem_par_elastic
-  use specfem_par_poroelastic
-
-  implicit none
-
-  integer,intent(in) :: iphase
-
-  select case(NGLLX)
-
-  case (5)
-    call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB, &
-                  b_displ,b_veloc,b_accel, &
-                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
-                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                  kappastore,mustore,jacobian,ibool, &
-                  ATTENUATION,deltat, &
-                  one_minus_sum_beta,factor_common, &
-                  b_alphaval,b_betaval,b_gammaval, &
-                  NSPEC_ATTENUATION_AB, &
-                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
-                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
-                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
-                  ANISOTROPY,NSPEC_ANISO, &
-                  c11store,c12store,c13store,c14store,c15store,c16store,&
-                  c22store,c23store,c24store,c25store,c26store,c33store,&
-                  c34store,c35store,c36store,c44store,c45store,c46store,&
-                  c55store,c56store,c66store, &
-                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
-                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
-                  is_moho_top,is_moho_bot, &
-                  b_dsdx_top,b_dsdx_bot, &
-                  ispec2D_moho_top,ispec2D_moho_bot, &
-                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
-                  phase_ispec_inner_elastic )
-
-  case default
-
-    stop 'error no Deville routine available for chosen NGLLX'
-
-  end select
-
-
-end subroutine compute_forces_elastic_Dev_sim3
-

Deleted: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_Dev_openmp.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_Dev_openmp.f90	2013-01-16 17:48:05 UTC (rev 21238)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_Dev_openmp.f90	2013-01-16 18:04:01 UTC (rev 21239)
@@ -1,982 +0,0 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 1
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and CNRS / INRIA / University of Pau
-! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
-!                            October 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! OpenMP Threaded variant by John Levesque, Max Rietmann and Olaf Schenk
-
-  subroutine compute_forces_elastic_Dev_openmp(iphase ,NSPEC_AB,NGLOB_AB, &
-                             displ,veloc,accel, &
-                             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                             hprime_xx,hprime_xxT, &
-                             hprimewgll_xx,hprimewgll_xxT, &
-                             wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
-                             kappastore,mustore,jacobian,ibool, &
-                             ATTENUATION,deltat, &
-                             one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
-                             NSPEC_ATTENUATION_AB, &
-                             R_xx,R_yy,R_xy,R_xz,R_yz, &
-                             epsilondev_xx,epsilondev_yy,epsilondev_xy, &
-                             epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
-                             ANISOTROPY,NSPEC_ANISO, &
-                             c11store,c12store,c13store,c14store,c15store,c16store,&
-                             c22store,c23store,c24store,c25store,c26store,c33store,&
-                             c34store,c35store,c36store,c44store,c45store,c46store,&
-                             c55store,c56store,c66store, &
-                             SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
-                             NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
-                             is_moho_top,is_moho_bot, &
-                             dsdx_top,dsdx_bot, &
-                             ispec2D_moho_top,ispec2D_moho_bot, &
-                             num_phase_ispec_elastic,&
-                             phase_ispec_inner_elastic,&
-                             num_colors_outer_elastic,num_colors_inner_elastic)
-
-
-
-  ! computes elastic tensor term
-
-  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
-       N_SLS,SAVE_MOHO_MESH, &
-       ONE_THIRD,FOUR_THIRDS,m1,m2
-
-  ! Trying to pass these variables as subroutine arguments ran into
-  ! problems, so we reference them from their module, making them
-  ! accessible from this subroutine
-  use specfem_par_elastic, only:dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3, &
-       newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3, &
-       tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3,num_elem_colors_elastic, &
-       dummyx_loc_att,dummyy_loc_att,dummyz_loc_att,tempx1_att,tempx2_att,tempx3_att, &
-       tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
-
-  use fault_solver_dynamic, only : Kelvin_Voigt_eta
-
-  implicit none
-
-  integer :: NSPEC_AB,NGLOB_AB
-
-  ! displacement and acceleration
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,accel
-
-
-  ! arrays with mesh parameters per slice
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-       xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-       kappastore,mustore,jacobian
-
-  ! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-  ! memory variables and standard linear solids for attenuation
-  logical :: ATTENUATION
-  logical :: COMPUTE_AND_STORE_STRAIN
-  integer :: NSPEC_STRAIN_ONLY, NSPEC_ADJOINT
-  integer :: NSPEC_ATTENUATION_AB
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: one_minus_sum_beta
-  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: factor_common
-  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB,N_SLS) :: &
-       R_xx,R_yy,R_xy,R_xz,R_yz
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY) :: &
-       epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
-  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilon_trace_over_3
-
-  ! anisotropy
-  logical :: ANISOTROPY
-  integer :: NSPEC_ANISO
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
-       c11store,c12store,c13store,c14store,c15store,c16store, &
-       c22store,c23store,c24store,c25store,c26store,c33store, &
-       c34store,c35store,c36store,c44store,c45store,c46store, &
-       c55store,c56store,c66store
-
-  integer :: iphase
-  integer :: num_phase_ispec_elastic
-  integer, dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
-
-  ! adjoint simulations
-  integer :: SIMULATION_TYPE
-  integer :: NSPEC_BOUN,NSPEC2D_MOHO
-
-  ! moho kernel
-  real(kind=CUSTOM_REAL),dimension(NDIM,NDIM,NGLLX,NGLLY,NGLLZ,NSPEC2D_MOHO):: &
-       dsdx_top,dsdx_bot
-  logical,dimension(NSPEC_BOUN) :: is_moho_top,is_moho_bot
-  integer :: ispec2D_moho_top, ispec2D_moho_bot
-
-  ! local attenuation parameters
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: epsilondev_xx_loc, &
-       epsilondev_yy_loc, epsilondev_xy_loc, epsilondev_xz_loc, epsilondev_yz_loc
-  real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3
-  real(kind=CUSTOM_REAL) factor_loc,alphaval_loc,betaval_loc,gammaval_loc
-  real(kind=CUSTOM_REAL) Sn,Snp1
-  real(kind=CUSTOM_REAL) templ
-
-  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
-  real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
-
-  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
-
-  real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
-
-  real(kind=CUSTOM_REAL) fac1,fac2,fac3
-
-  real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
-  real(kind=CUSTOM_REAL) kappal
-
-  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
-  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att;
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att;
-
-  integer OMP_get_thread_num
-  integer OMP_GET_MAX_THREADS
-
-  ! timing
-  !double precision omp_get_wtime
-  !double precision start_time
-  !double precision end_time
-  !double precision accumulate_time_start
-  !double precision accumulate_time_stop
-
-  ! local anisotropy parameters
-  real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
-       c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
-
-  integer i_SLS,imodulo_N_SLS
-  integer ispec,iglob,ispec_p,num_elements
-  integer i,j,k
-  integer thread_id
-  integer NUM_THREADS
-  !integer omp_get_num_threads ! function
-
-  ! coloring additions
-  ! integer, dimension(:), allocatable :: num_elem_colors_elastic
-  integer istart, estart, number_of_colors
-  integer num_colors_outer_elastic, num_colors_inner_elastic
-  integer icolor
-
-  real(kind=CUSTOM_REAL) :: eta
-
-  ! write(*,*) "num_elem_colors_elastic(1) = ",num_elem_colors_elastic(1)
-  imodulo_N_SLS = mod(N_SLS,3)
-
-  ! NUM_THREADS = 1
-  NUM_THREADS = OMP_GET_MAX_THREADS()
-
-
-  ! choses inner/outer elements
-  if( iphase == 1 ) then
-     number_of_colors = num_colors_outer_elastic
-     istart = 1
-  else
-     number_of_colors = num_colors_inner_elastic + num_colors_outer_elastic
-     istart = num_colors_outer_elastic+1
-     ! istart = num_colors_outer_elastic
-  endif
-
-  ! "start" timer
-  ! start_time = omp_get_wtime()
-
-  ! The mesh coloring algorithm provides disjoint sets of elements that
-  ! do not share degrees of freedom which is required for the assembly
-  ! step at the "accel(iglob) += update" step. The coloring is
-  ! implemented, such that the element and node indices are ordered by
-  ! color. This requires then only to iterate through the elements in
-  ! order, stopping to synchronize threads after all the elements in a
-  ! color are finished.
-  estart = 1
-  do icolor = istart, number_of_colors
-
-    !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(&
-    !$OMP R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3,&
-    !$OMP factor_loc,alphaval_loc,betaval_loc,gammaval_loc,&
-    !$OMP Sn,Snp1,&
-    !$OMP templ,&
-    !$OMP xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl,&
-    !$OMP duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl,&
-    !$OMP duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl,&
-    !$OMP duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl,&
-    !$OMP sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,&
-    !$OMP fac1,fac2,fac3,&
-    !$OMP lambdal,mul,lambdalplus2mul,kappal,&
-    !$OMP c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
-    !$OMP c33,c34,c35,c36,c44,c45,c46,c55,c56,c66,&
-    !$OMP i_SLS,&
-    !$OMP ispec,iglob,ispec_p,&
-    !$OMP i,j,k,&
-    !$OMP thread_id)
-
-    thread_id = OMP_get_thread_num()+1
-
-    ! we retrive the subset of the total elements determined by the mesh
-    ! coloring. This number changes as we iterate through the colors
-    num_elements = num_elem_colors_elastic(icolor)
-    !$OMP DO
-    do ispec_p = estart,(estart-1)+num_elements
-
-
-       ! returns element id from stored element list
-       ispec = phase_ispec_inner_elastic(ispec_p,iphase)
-
-       ! adjoint simulations: moho kernel
-       if( SIMULATION_TYPE == 3 .and. SAVE_MOHO_MESH ) then
-          if (is_moho_top(ispec)) then
-             ispec2D_moho_top = ispec2D_moho_top + 1
-          else if (is_moho_bot(ispec)) then
-             ispec2D_moho_bot = ispec2D_moho_bot + 1
-          endif
-       endif ! adjoint
-
-       ! Kelvin Voigt damping: artificial viscosity around dynamic faults
-
-        ! stores displacment values in local array
-        if (allocated(Kelvin_Voigt_eta)) then
-          eta = Kelvin_Voigt_eta(ispec)   
-          do k=1,NGLLZ
-            do j=1,NGLLY
-              do i=1,NGLLX
-                iglob = ibool(i,j,k,ispec)
-                dummyx_loc(i,j,k,thread_id) = displ(1,iglob) + eta*veloc(1,iglob)
-                dummyy_loc(i,j,k,thread_id) = displ(2,iglob) + eta*veloc(2,iglob)
-                dummyz_loc(i,j,k,thread_id) = displ(3,iglob) + eta*veloc(3,iglob)
-              enddo
-            enddo
-          enddo
-
-        else
-          do k=1,NGLLZ
-            do j=1,NGLLY
-              do i=1,NGLLX
-                iglob = ibool(i,j,k,ispec)
-                dummyx_loc(i,j,k,thread_id) = displ(1,iglob)
-                dummyy_loc(i,j,k,thread_id) = displ(2,iglob)
-                dummyz_loc(i,j,k,thread_id) = displ(3,iglob)
-              enddo
-            enddo
-          enddo
-        endif
-
-       ! stores velocity values in local array
-       do k=1,NGLLZ
-          do j=1,NGLLY
-             do i=1,NGLLX
-                iglob = ibool(i,j,k,ispec)
-                dummyx_loc_att(i,j,k,thread_id) = veloc(1,iglob)
-                dummyy_loc_att(i,j,k,thread_id) = veloc(2,iglob)
-                dummyz_loc_att(i,j,k,thread_id) = veloc(3,iglob)
-             enddo
-          enddo
-       enddo
-
-       ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-       ! for incompressible fluid flow, Cambridge University Press (2002),
-       ! pages 386 and 389 and Figure 8.3.1
-       ! call mxm_m1_m2_5points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
-       do j=1,m2
-          do i=1,m1
-             tempx1(i,j,1,thread_id) = &
-                  hprime_xx(i,1)*dummyx_loc(1,j,1,thread_id) + &
-                  hprime_xx(i,2)*dummyx_loc(2,j,1,thread_id) + &
-                  hprime_xx(i,3)*dummyx_loc(3,j,1,thread_id) + &
-                  hprime_xx(i,4)*dummyx_loc(4,j,1,thread_id) + &
-                  hprime_xx(i,5)*dummyx_loc(5,j,1,thread_id)
-             tempy1(i,j,1,thread_id) = &
-                  hprime_xx(i,1)*dummyy_loc(1,j,1,thread_id) + &
-                  hprime_xx(i,2)*dummyy_loc(2,j,1,thread_id) + &
-                  hprime_xx(i,3)*dummyy_loc(3,j,1,thread_id) + &
-                  hprime_xx(i,4)*dummyy_loc(4,j,1,thread_id) + &
-                  hprime_xx(i,5)*dummyy_loc(5,j,1,thread_id)
-             tempz1(i,j,1,thread_id) = &
-                  hprime_xx(i,1)*dummyz_loc(1,j,1,thread_id) + &
-                  hprime_xx(i,2)*dummyz_loc(2,j,1,thread_id) + &
-                  hprime_xx(i,3)*dummyz_loc(3,j,1,thread_id) + &
-                  hprime_xx(i,4)*dummyz_loc(4,j,1,thread_id) + &
-                  hprime_xx(i,5)*dummyz_loc(5,j,1,thread_id)
-          enddo
-       enddo
-
-       !   call mxm_m1_m1_5points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
-       !          hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
-       do j=1,m1
-          do i=1,m1
-             ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-             do k = 1,NGLLX
-                tempx2(i,j,k,thread_id) = dummyx_loc(i,1,k,thread_id)*hprime_xxT(1,j) + &
-                     dummyx_loc(i,2,k,thread_id)*hprime_xxT(2,j) + &
-                     dummyx_loc(i,3,k,thread_id)*hprime_xxT(3,j) + &
-                     dummyx_loc(i,4,k,thread_id)*hprime_xxT(4,j) + &
-                     dummyx_loc(i,5,k,thread_id)*hprime_xxT(5,j)
-                tempy2(i,j,k,thread_id) = dummyy_loc(i,1,k,thread_id)*hprime_xxT(1,j) + &
-                     dummyy_loc(i,2,k,thread_id)*hprime_xxT(2,j) + &
-                     dummyy_loc(i,3,k,thread_id)*hprime_xxT(3,j) + &
-                     dummyy_loc(i,4,k,thread_id)*hprime_xxT(4,j) + &
-                     dummyy_loc(i,5,k,thread_id)*hprime_xxT(5,j)
-                tempz2(i,j,k,thread_id) = dummyz_loc(i,1,k,thread_id)*hprime_xxT(1,j) + &
-                     dummyz_loc(i,2,k,thread_id)*hprime_xxT(2,j) + &
-                     dummyz_loc(i,3,k,thread_id)*hprime_xxT(3,j) + &
-                     dummyz_loc(i,4,k,thread_id)*hprime_xxT(4,j) + &
-                     dummyz_loc(i,5,k,thread_id)*hprime_xxT(5,j)
-             enddo
-          enddo
-       enddo
-
-       ! call mxm_m2_m1_5points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
-       do j=1,m1
-          do i=1,m2
-             tempx3(i,1,j,thread_id) = &
-                  dummyx_loc(i,1,1,thread_id)*hprime_xxT(1,j) + &
-                  dummyx_loc(i,1,2,thread_id)*hprime_xxT(2,j) + &
-                  dummyx_loc(i,1,3,thread_id)*hprime_xxT(3,j) + &
-                  dummyx_loc(i,1,4,thread_id)*hprime_xxT(4,j) + &
-                  dummyx_loc(i,1,5,thread_id)*hprime_xxT(5,j)
-             tempy3(i,1,j,thread_id) = &
-                  dummyy_loc(i,1,1,thread_id)*hprime_xxT(1,j) + &
-                  dummyy_loc(i,1,2,thread_id)*hprime_xxT(2,j) + &
-                  dummyy_loc(i,1,3,thread_id)*hprime_xxT(3,j) + &
-                  dummyy_loc(i,1,4,thread_id)*hprime_xxT(4,j) + &
-                  dummyy_loc(i,1,5,thread_id)*hprime_xxT(5,j)
-             tempz3(i,1,j,thread_id) = &
-                  dummyz_loc(i,1,1,thread_id)*hprime_xxT(1,j) + &
-                  dummyz_loc(i,1,2,thread_id)*hprime_xxT(2,j) + &
-                  dummyz_loc(i,1,3,thread_id)*hprime_xxT(3,j) + &
-                  dummyz_loc(i,1,4,thread_id)*hprime_xxT(4,j) + &
-                  dummyz_loc(i,1,5,thread_id)*hprime_xxT(5,j)
-
-          enddo
-       enddo
-
-       if( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
-          do j=1,m2
-             do i=1,m1
-                tempx1_att(i,j,1,thread_id) = tempx1(i,j,1,thread_id)
-                tempy1_att(i,j,1,thread_id) = tempy1(i,j,1,thread_id)
-                tempz1_att(i,j,1,thread_id) = tempz1(i,j,1,thread_id)
-             enddo
-          enddo
-
-          do j=1,m1
-             do i=1,m1
-                do k=1,NGLLX
-                   tempx2_att(i,j,k,thread_id) = tempx2(i,j,k,thread_id)
-                   tempy2_att(i,j,k,thread_id) = tempy2(i,j,k,thread_id)
-                   tempz2_att(i,j,k,thread_id) = tempz2(i,j,k,thread_id)
-                enddo
-             enddo
-          enddo
-
-          do j=1,m1
-             do i=1,m2
-                tempx3_att(i,1,j,thread_id) = tempx3(i,1,j,thread_id)
-                tempy3_att(i,1,j,thread_id) = tempy3(i,1,j,thread_id)
-                tempz3_att(i,1,j,thread_id) = tempz3(i,1,j,thread_id)
-             enddo
-          enddo
-
-          ! use first order Taylor expansion of displacement for local storage of stresses 
-          ! at this current time step, to fix attenuation in a consistent way
-          do j=1,m2
-             do i=1,m1
-                tempx1l_att(i,j,1,thread_id) = tempx1l_att(i,j,1,thread_id) + &
-                     deltat*hprime_xx(i,1)*dummyx_loc_att(1,j,1,thread_id) + &
-                     deltat*hprime_xx(i,2)*dummyx_loc_att(2,j,1,thread_id) + &
-                     deltat*hprime_xx(i,3)*dummyx_loc_att(3,j,1,thread_id) + &
-                     deltat*hprime_xx(i,4)*dummyx_loc_att(4,j,1,thread_id) + &
-                     deltat*hprime_xx(i,5)*dummyx_loc_att(5,j,1,thread_id)
-
-                tempy1l_att(i,j,1,thread_id) = tempy1l_att(i,j,1,thread_id) + &
-                     deltat*hprime_xx(i,1)*dummyy_loc_att(1,j,1,thread_id) + &
-                     deltat*hprime_xx(i,2)*dummyy_loc_att(2,j,1,thread_id) + &
-                     deltat*hprime_xx(i,3)*dummyy_loc_att(3,j,1,thread_id) + &
-                     deltat*hprime_xx(i,4)*dummyy_loc_att(4,j,1,thread_id) + &
-                     deltat*hprime_xx(i,5)*dummyy_loc_att(5,j,1,thread_id)
-
-                tempz1l_att(i,j,1,thread_id) = tempz1l_att(i,j,1,thread_id) + &
-                     deltat*hprime_xx(i,1)*dummyz_loc_att(1,j,1,thread_id) + &
-                     deltat*hprime_xx(i,2)*dummyz_loc_att(2,j,1,thread_id) + &
-                     deltat*hprime_xx(i,3)*dummyz_loc_att(3,j,1,thread_id) + &
-                     deltat*hprime_xx(i,4)*dummyz_loc_att(4,j,1,thread_id) + &
-                     deltat*hprime_xx(i,5)*dummyz_loc_att(5,j,1,thread_id)
-             enddo
-          enddo
-
-          do j=1,m1
-             do i=1,m1
-                do k=1,NGLLX
-                   tempx2l_att(i,j,k,thread_id) = tempx2l_att(i,j,k,thread_id) + &
-                        deltat*dummyx_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
-                        deltat*dummyx_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
-                        deltat*dummyx_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
-                        deltat*dummyx_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
-                        deltat*dummyx_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
-
-                   tempy2l_att(i,j,k,thread_id) = tempy2l_att(i,j,k,thread_id) + &
-                        deltat*dummyy_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
-                        deltat*dummyy_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
-                        deltat*dummyy_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
-                        deltat*dummyy_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
-                        deltat*dummyy_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
-
-                   tempz2l_att(i,j,k,thread_id) = tempz2l_att(i,j,k,thread_id) + &
-                        deltat*dummyz_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
-                        deltat*dummyz_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
-                        deltat*dummyz_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
-                        deltat*dummyz_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
-                        deltat*dummyz_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
-                enddo
-             enddo
-          enddo
-
-          do j=1,m1
-             do i=1,m2
-                tempx3l_att(i,1,j,thread_id) = tempx3l_att(i,1,j,thread_id) + &
-                     deltat*dummyx_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
-                     deltat*dummyx_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
-                     deltat*dummyx_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
-                     deltat*dummyx_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
-                     deltat*dummyx_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
-
-                tempy3l_att(i,1,j,thread_id) = tempy3l_att(i,1,j,thread_id) + &
-                     deltat*dummyy_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
-                     deltat*dummyy_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
-                     deltat*dummyy_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
-                     deltat*dummyy_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
-                     deltat*dummyy_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
-
-                tempz3l_att(i,1,j,thread_id) = tempz3l_att(i,1,j,thread_id) + &
-                     deltat*dummyz_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
-                     deltat*dummyz_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
-                     deltat*dummyz_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
-                     deltat*dummyz_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
-                     deltat*dummyz_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
-             enddo
-          endif
-       endif
-
-       do k=1,NGLLZ
-          do j=1,NGLLY
-             do i=1,NGLLX
-                ! get derivatives of ux, uy and uz with respect to x, y and z
-                xixl = xix(i,j,k,ispec)
-                xiyl = xiy(i,j,k,ispec)
-                xizl = xiz(i,j,k,ispec)
-                etaxl = etax(i,j,k,ispec)
-                etayl = etay(i,j,k,ispec)
-                etazl = etaz(i,j,k,ispec)
-                gammaxl = gammax(i,j,k,ispec)
-                gammayl = gammay(i,j,k,ispec)
-                gammazl = gammaz(i,j,k,ispec)
-                jacobianl = jacobian(i,j,k,ispec)
-
-                duxdxl = xixl*tempx1(i,j,k,thread_id) + etaxl*tempx2(i,j,k,thread_id) + gammaxl*tempx3(i,j,k,thread_id)
-                duxdyl = xiyl*tempx1(i,j,k,thread_id) + etayl*tempx2(i,j,k,thread_id) + gammayl*tempx3(i,j,k,thread_id)
-                duxdzl = xizl*tempx1(i,j,k,thread_id) + etazl*tempx2(i,j,k,thread_id) + gammazl*tempx3(i,j,k,thread_id)
-
-                duydxl = xixl*tempy1(i,j,k,thread_id) + etaxl*tempy2(i,j,k,thread_id) + gammaxl*tempy3(i,j,k,thread_id)
-                duydyl = xiyl*tempy1(i,j,k,thread_id) + etayl*tempy2(i,j,k,thread_id) + gammayl*tempy3(i,j,k,thread_id)
-                duydzl = xizl*tempy1(i,j,k,thread_id) + etazl*tempy2(i,j,k,thread_id) + gammazl*tempy3(i,j,k,thread_id)
-
-                duzdxl = xixl*tempz1(i,j,k,thread_id) + etaxl*tempz2(i,j,k,thread_id) + gammaxl*tempz3(i,j,k,thread_id)
-                duzdyl = xiyl*tempz1(i,j,k,thread_id) + etayl*tempz2(i,j,k,thread_id) + gammayl*tempz3(i,j,k,thread_id)
-                duzdzl = xizl*tempz1(i,j,k,thread_id) + etazl*tempz2(i,j,k,thread_id) + gammazl*tempz3(i,j,k,thread_id)
-
-                ! save strain on the Moho boundary
-                if (SAVE_MOHO_MESH ) then
-                   if (is_moho_top(ispec)) then
-                      dsdx_top(1,1,i,j,k,ispec2D_moho_top) = duxdxl
-                      dsdx_top(1,2,i,j,k,ispec2D_moho_top) = duxdyl
-                      dsdx_top(1,3,i,j,k,ispec2D_moho_top) = duxdzl
-                      dsdx_top(2,1,i,j,k,ispec2D_moho_top) = duydxl
-                      dsdx_top(2,2,i,j,k,ispec2D_moho_top) = duydyl
-                      dsdx_top(2,3,i,j,k,ispec2D_moho_top) = duydzl
-                      dsdx_top(3,1,i,j,k,ispec2D_moho_top) = duzdxl
-                      dsdx_top(3,2,i,j,k,ispec2D_moho_top) = duzdyl
-                      dsdx_top(3,3,i,j,k,ispec2D_moho_top) = duzdzl
-                   else if (is_moho_bot(ispec)) then
-                      dsdx_bot(1,1,i,j,k,ispec2D_moho_bot) = duxdxl
-                      dsdx_bot(1,2,i,j,k,ispec2D_moho_bot) = duxdyl
-                      dsdx_bot(1,3,i,j,k,ispec2D_moho_bot) = duxdzl
-                      dsdx_bot(2,1,i,j,k,ispec2D_moho_bot) = duydxl
-                      dsdx_bot(2,2,i,j,k,ispec2D_moho_bot) = duydyl
-                      dsdx_bot(2,3,i,j,k,ispec2D_moho_bot) = duydzl
-                      dsdx_bot(3,1,i,j,k,ispec2D_moho_bot) = duzdxl
-                      dsdx_bot(3,2,i,j,k,ispec2D_moho_bot) = duzdyl
-                      dsdx_bot(3,3,i,j,k,ispec2D_moho_bot) = duzdzl
-                   endif
-                endif
-
-                ! precompute some sums to save CPU time
-                duxdxl_plus_duydyl = duxdxl + duydyl
-                duxdxl_plus_duzdzl = duxdxl + duzdzl
-                duydyl_plus_duzdzl = duydyl + duzdzl
-                duxdyl_plus_duydxl = duxdyl + duydxl
-                duzdxl_plus_duxdzl = duzdxl + duxdzl
-                duzdyl_plus_duydzl = duzdyl + duydzl
-                
-                if( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
-                   ! temporary variables used for fixing attenuation in a consistent way
-                   duxdxl_att = xixl*tempx1l_att(i,j,k,thread_id) + etaxl*tempx2l_att(i,j,k,thread_id) + &
-                        gammaxl*tempx3l_att(i,j,k,thread_id)
-                   duxdyl_att = xiyl*tempx1l_att(i,j,k,thread_id) + etayl*tempx2l_att(i,j,k,thread_id) + &
-                        gammayl*tempx3l_att(i,j,k,thread_id)
-                   duxdzl_att = xizl*tempx1l_att(i,j,k,thread_id) + etazl*tempx2l_att(i,j,k,thread_id) + &
-                        gammazl*tempx3l_att(i,j,k,thread_id)
-
-                   duydxl_att = xixl*tempy1l_att(i,j,k,thread_id) + etaxl*tempy2l_att(i,j,k,thread_id) + &
-                        gammaxl*tempy3l_att(i,j,k,thread_id)
-                   duydyl_att = xiyl*tempy1l_att(i,j,k,thread_id) + etayl*tempy2l_att(i,j,k,thread_id) + &
-                        gammayl*tempy3l_att(i,j,k,thread_id)
-                   duydzl_att = xizl*tempy1l_att(i,j,k,thread_id) + etazl*tempy2l_att(i,j,k,thread_id) + &
-                        gammazl*tempy3l_att(i,j,k,thread_id)
-
-                   duzdxl_att = xixl*tempz1l_att(i,j,k,thread_id) + etaxl*tempz2l_att(i,j,k,thread_id) + &
-                        gammaxl*tempz3l_att(i,j,k,thread_id)
-                   duzdyl_att = xiyl*tempz1l_att(i,j,k,thread_id) + etayl*tempz2l_att(i,j,k,thread_id) + &
-                        gammayl*tempz3l_att(i,j,k,thread_id)
-                   duzdzl_att = xizl*tempz1l_att(i,j,k,thread_id) + etazl*tempz2l_att(i,j,k,thread_id) + &
-                        gammazl*tempz3l_att(i,j,k,thread_id)
-
-                   ! precompute some sums to save CPU time
-                   duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
-                   duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
-                   duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
-
-                   ! compute deviatoric strain
-                   if( SIMULATION_TYPE == 3 ) &
-                        epsilon_trace_over_3(i,j,k,ispec) = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
-                   epsilondev_xx_loc(i,j,k) = duxdxl_att - epsilon_trace_over_3(i,j,k,ispec)
-                   epsilondev_yy_loc(i,j,k) = duydyl_att - epsilon_trace_over_3(i,j,k,ispec)
-                   epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl_att
-                   epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
-                   epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl_att
-                else
-                   ! computes deviatoric strain attenuation and/or for kernel calculations
-                   if (COMPUTE_AND_STORE_STRAIN) then
-                      templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
-                      if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                      epsilondev_xx_loc(i,j,k) = duxdxl - templ
-                      epsilondev_yy_loc(i,j,k) = duydyl - templ
-                      epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl
-                      epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl
-                      epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl
-                   endif
-                endif
-
-                kappal = kappastore(i,j,k,ispec)
-                mul = mustore(i,j,k,ispec)
-
-                ! attenuation
-                if(ATTENUATION) then
-                   ! use unrelaxed parameters if attenuation
-                   mul  = mul * one_minus_sum_beta(i,j,k,ispec)
-                endif
-
-                ! full anisotropic case, stress calculations
-                if(ANISOTROPY) then
-                   c11 = c11store(i,j,k,ispec)
-                   c12 = c12store(i,j,k,ispec)
-                   c13 = c13store(i,j,k,ispec)
-                   c14 = c14store(i,j,k,ispec)
-                   c15 = c15store(i,j,k,ispec)
-                   c16 = c16store(i,j,k,ispec)
-                   c22 = c22store(i,j,k,ispec)
-                   c23 = c23store(i,j,k,ispec)
-                   c24 = c24store(i,j,k,ispec)
-                   c25 = c25store(i,j,k,ispec)
-                   c26 = c26store(i,j,k,ispec)
-                   c33 = c33store(i,j,k,ispec)
-                   c34 = c34store(i,j,k,ispec)
-                   c35 = c35store(i,j,k,ispec)
-                   c36 = c36store(i,j,k,ispec)
-                   c44 = c44store(i,j,k,ispec)
-                   c45 = c45store(i,j,k,ispec)
-                   c46 = c46store(i,j,k,ispec)
-                   c55 = c55store(i,j,k,ispec)
-                   c56 = c56store(i,j,k,ispec)
-                   c66 = c66store(i,j,k,ispec)
-
-                   sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
-                        c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
-                   sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
-                        c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
-                   sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
-                        c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
-                   sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
-                        c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
-                   sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
-                        c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
-                   sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
-                        c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
-
-                else
-
-                   ! isotropic case
-                   lambdalplus2mul = kappal + FOUR_THIRDS * mul
-                   lambdal = lambdalplus2mul - 2.*mul
-
-                   ! compute stress sigma
-                   sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
-                   sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
-                   sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
-
-                   sigma_xy = mul*duxdyl_plus_duydxl
-                   sigma_xz = mul*duzdxl_plus_duxdzl
-                   sigma_yz = mul*duzdyl_plus_duydzl
-
-                endif ! ANISOTROPY
-
-                ! subtract memory variables if attenuation
-                if(ATTENUATION) then
-                   ! way 1
-                   !                do i_sls = 1,N_SLS
-                   !                  R_xx_val = R_xx(i,j,k,ispec,i_sls)
-                   !                  R_yy_val = R_yy(i,j,k,ispec,i_sls)
-                   !                  sigma_xx = sigma_xx - R_xx_val
-                   !                  sigma_yy = sigma_yy - R_yy_val
-                   !                  sigma_zz = sigma_zz + R_xx_val + R_yy_val
-                   !                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                   !                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                   !                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-                   !                enddo
-
-                   ! way 2
-                   ! note: this should help compilers to pipeline the code and make better use of the cache;
-                   !          depending on compilers, it can further decrease the computation time by ~ 30%.
-                   !          by default, N_SLS = 3, therefore we take steps of 3
-                   if(imodulo_N_SLS >= 1) then
-                      do i_sls = 1,imodulo_N_SLS
-                         R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                         R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                         sigma_xx = sigma_xx - R_xx_val1
-                         sigma_yy = sigma_yy - R_yy_val1
-                         sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
-                         sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                         sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                         sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-                      enddo
-                   endif
-
-                   if(N_SLS >= imodulo_N_SLS+1) then
-                      do i_sls = imodulo_N_SLS+1,N_SLS,3
-                         R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
-                         R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
-                         sigma_xx = sigma_xx - R_xx_val1
-                         sigma_yy = sigma_yy - R_yy_val1
-                         sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
-                         sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
-                         sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
-                         sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-
-                         R_xx_val2 = R_xx(i,j,k,ispec,i_sls+1)
-                         R_yy_val2 = R_yy(i,j,k,ispec,i_sls+1)
-                         sigma_xx = sigma_xx - R_xx_val2
-                         sigma_yy = sigma_yy - R_yy_val2
-                         sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2
-                         sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+1)
-                         sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+1)
-                         sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+1)
-
-                         R_xx_val3 = R_xx(i,j,k,ispec,i_sls+2)
-                         R_yy_val3 = R_yy(i,j,k,ispec,i_sls+2)
-                         sigma_xx = sigma_xx - R_xx_val3
-                         sigma_yy = sigma_yy - R_yy_val3
-                         sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3
-                         sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+2)
-                         sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+2)
-                         sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+2)
-                      enddo
-                   endif
-
-
-                endif
-
-                ! form dot product with test vector, symmetric form
-                tempx1(i,j,k,thread_id) = jacobianl * (sigma_xx*xixl + sigma_xy*xiyl + sigma_xz*xizl)
-                tempy1(i,j,k,thread_id) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_yz*xizl)
-                tempz1(i,j,k,thread_id) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl)
-
-                tempx2(i,j,k,thread_id) = jacobianl * (sigma_xx*etaxl + sigma_xy*etayl + sigma_xz*etazl)
-                tempy2(i,j,k,thread_id) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_yz*etazl)
-                tempz2(i,j,k,thread_id) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl)
-
-                tempx3(i,j,k,thread_id) = jacobianl * (sigma_xx*gammaxl + sigma_xy*gammayl + sigma_xz*gammazl)
-                tempy3(i,j,k,thread_id) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_yz*gammazl)
-                tempz3(i,j,k,thread_id) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl)
-
-             enddo
-          enddo
-       enddo
-
-       ! subroutines adapted from Deville, Fischer and Mund, High-order methods
-       ! for incompressible fluid flow, Cambridge University Press (2002),
-       ! pages 386 and 389 and Figure 8.3.1
-       ! call mxm_m1_m2_5points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
-       do j=1,m2
-          do i=1,m1
-             newtempx1(i,j,1,thread_id) = &
-                  hprimewgll_xxT(i,1)*tempx1(1,j,1,thread_id) + &
-                  hprimewgll_xxT(i,2)*tempx1(2,j,1,thread_id) + &
-                  hprimewgll_xxT(i,3)*tempx1(3,j,1,thread_id) + &
-                  hprimewgll_xxT(i,4)*tempx1(4,j,1,thread_id) + &
-                  hprimewgll_xxT(i,5)*tempx1(5,j,1,thread_id)
-             newtempy1(i,j,1,thread_id) = &
-                  hprimewgll_xxT(i,1)*tempy1(1,j,1,thread_id) + &
-                  hprimewgll_xxT(i,2)*tempy1(2,j,1,thread_id) + &
-                  hprimewgll_xxT(i,3)*tempy1(3,j,1,thread_id) + &
-                  hprimewgll_xxT(i,4)*tempy1(4,j,1,thread_id) + &
-                  hprimewgll_xxT(i,5)*tempy1(5,j,1,thread_id)
-             newtempz1(i,j,1,thread_id) = &
-                  hprimewgll_xxT(i,1)*tempz1(1,j,1,thread_id) + &
-                  hprimewgll_xxT(i,2)*tempz1(2,j,1,thread_id) + &
-                  hprimewgll_xxT(i,3)*tempz1(3,j,1,thread_id) + &
-                  hprimewgll_xxT(i,4)*tempz1(4,j,1,thread_id) + &
-                  hprimewgll_xxT(i,5)*tempz1(5,j,1,thread_id)
-          enddo
-       enddo
-
-       !   call mxm_m1_m1_5points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
-       !         hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
-       do i=1,m1
-          do j=1,m1
-             ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
-             do k = 1,NGLLX
-                newtempx2(i,j,k,thread_id) = tempx2(i,1,k,thread_id)*hprimewgll_xx(1,j) + &
-                     tempx2(i,2,k,thread_id)*hprimewgll_xx(2,j) + &
-                     tempx2(i,3,k,thread_id)*hprimewgll_xx(3,j) + &
-                     tempx2(i,4,k,thread_id)*hprimewgll_xx(4,j) + &
-                     tempx2(i,5,k,thread_id)*hprimewgll_xx(5,j)
-                newtempy2(i,j,k,thread_id) = tempy2(i,1,k,thread_id)*hprimewgll_xx(1,j) + &
-                     tempy2(i,2,k,thread_id)*hprimewgll_xx(2,j) + &
-                     tempy2(i,3,k,thread_id)*hprimewgll_xx(3,j) + &
-                     tempy2(i,4,k,thread_id)*hprimewgll_xx(4,j) + &
-                     tempy2(i,5,k,thread_id)*hprimewgll_xx(5,j)
-                newtempz2(i,j,k,thread_id) = tempz2(i,1,k,thread_id)*hprimewgll_xx(1,j) + &
-                     tempz2(i,2,k,thread_id)*hprimewgll_xx(2,j) + &
-                     tempz2(i,3,k,thread_id)*hprimewgll_xx(3,j) + &
-                     tempz2(i,4,k,thread_id)*hprimewgll_xx(4,j) + &
-                     tempz2(i,5,k,thread_id)*hprimewgll_xx(5,j)
-             enddo
-          enddo
-       enddo
-
-       ! call mxm_m2_m1_5points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
-       do j=1,m1
-          do i=1,m2
-             newtempx3(i,1,j,thread_id) = &
-                  tempx3(i,1,1,thread_id)*hprimewgll_xx(1,j) + &
-                  tempx3(i,1,2,thread_id)*hprimewgll_xx(2,j) + &
-                  tempx3(i,1,3,thread_id)*hprimewgll_xx(3,j) + &
-                  tempx3(i,1,4,thread_id)*hprimewgll_xx(4,j) + &
-                  tempx3(i,1,5,thread_id)*hprimewgll_xx(5,j)
-             newtempy3(i,1,j,thread_id) = &
-                  tempy3(i,1,1,thread_id)*hprimewgll_xx(1,j) + &
-                  tempy3(i,1,2,thread_id)*hprimewgll_xx(2,j) + &
-                  tempy3(i,1,3,thread_id)*hprimewgll_xx(3,j) + &
-                  tempy3(i,1,4,thread_id)*hprimewgll_xx(4,j) + &
-                  tempy3(i,1,5,thread_id)*hprimewgll_xx(5,j)
-             newtempz3(i,1,j,thread_id) = &
-                  tempz3(i,1,1,thread_id)*hprimewgll_xx(1,j) + &
-                  tempz3(i,1,2,thread_id)*hprimewgll_xx(2,j) + &
-                  tempz3(i,1,3,thread_id)*hprimewgll_xx(3,j) + &
-                  tempz3(i,1,4,thread_id)*hprimewgll_xx(4,j) + &
-                  tempz3(i,1,5,thread_id)*hprimewgll_xx(5,j)
-          enddo
-       enddo
-
-       do k=1,NGLLZ
-          do j=1,NGLLY
-             do i=1,NGLLX
-
-                fac1 = wgllwgll_yz(j,k)
-                fac2 = wgllwgll_xz(i,k)
-                fac3 = wgllwgll_xy(i,j)
-
-                ! sum contributions from each element to the global mesh using indirect addressing
-                iglob = ibool(i,j,k,ispec)
-
-                ! accel_omp(1,iglob,thread_id) = accel_omp(1,iglob,thread_id)&
-                !      - fac1*newtempx1(i,j,k,thread_id) - fac2*newtempx2(i,j,k,thread_id)&
-                !      - fac3*newtempx3(i,j,k,thread_id)
-                ! accel_omp(2,iglob,thread_id) = accel_omp(2,iglob,thread_id)&
-                !      - fac1*newtempy1(i,j,k,thread_id) - fac2*newtempy2(i,j,k,thread_id)&
-                !      - fac3*newtempy3(i,j,k,thread_id)
-                ! accel_omp(3,iglob,thread_id) = accel_omp(3,iglob,thread_id)&
-                !      - fac1*newtempz1(i,j,k,thread_id) - fac2*newtempz2(i,j,k,thread_id)&
-                !      - fac3*newtempz3(i,j,k,thread_id)
-
-                ! Assembly of shared degrees of freedom fixed through mesh coloring
-                !! !$OMP ATOMIC
-                accel(1,iglob) = accel(1,iglob) &
-                                  - (fac1*newtempx1(i,j,k,thread_id) &
-                                   + fac2*newtempx2(i,j,k,thread_id) &
-                                   + fac3*newtempx3(i,j,k,thread_id))
-                !! !$OMP ATOMIC
-                accel(2,iglob) = accel(2,iglob) &
-                                  - (fac1*newtempy1(i,j,k,thread_id) &
-                                   + fac2*newtempy2(i,j,k,thread_id) &
-                                   + fac3*newtempy3(i,j,k,thread_id))
-                !! !$OMP ATOMIC
-                accel(3,iglob) = accel(3,iglob) &
-                                  - (fac1*newtempz1(i,j,k,thread_id) &
-                                   + fac2*newtempz2(i,j,k,thread_id) &
-                                   + fac3*newtempz3(i,j,k,thread_id))
-
-                ! accel(1,iglob) = accel(1,iglob) - &
-                ! (fac1*newtempx1(i,j,k,thread_id) + fac2*newtempx2(i,j,k,thread_id) + fac3*newtempx3(i,j,k,thread_id))
-                ! accel(2,iglob) = accel(2,iglob) - &
-                ! (fac1*newtempy1(i,j,k,thread_id) + fac2*newtempy2(i,j,k,thread_id) + fac3*newtempy3(i,j,k,thread_id))
-                ! accel(3,iglob) = accel(3,iglob) - &
-                ! (fac1*newtempz1(i,j,k,thread_id) + fac2*newtempz2(i,j,k,thread_id) + fac3*newtempz3(i,j,k,thread_id))
-
-                ! accel_omp(1,iglob,thread_id) = accel_omp(1,iglob,thread_id) - fac1*newtempx1(i,j,k,thread_id) - &
-                !                   fac2*newtempx2(i,j,k,thread_id) - fac3*newtempx3(i,j,k,thread_id)
-                ! accel_omp(2,iglob,thread_id) = accel_omp(2,iglob,thread_id) - fac1*newtempy1(i,j,k,thread_id) - &
-                !                   fac2*newtempy2(i,j,k,thread_id) - fac3*newtempy3(i,j,k,thread_id)
-                ! accel_omp(3,iglob,thread_id) = accel_omp(3,iglob,thread_id) - fac1*newtempz1(i,j,k,thread_id) - &
-                !      fac2*newtempz2(i,j,k,thread_id) - fac3*newtempz3(i,j,k,thread_id)
-
-                !  update memory variables based upon the Runge-Kutta scheme
-                if(ATTENUATION) then
-
-                   ! use Runge-Kutta scheme to march in time
-                   do i_sls = 1,N_SLS
-
-                      factor_loc = mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
-
-                      alphaval_loc = alphaval(i_sls)
-                      betaval_loc = betaval(i_sls)
-                      gammaval_loc = gammaval(i_sls)
-
-                      ! term in xx
-                      Sn   = factor_loc * epsilondev_xx(i,j,k,ispec)
-                      Snp1   = factor_loc * epsilondev_xx_loc(i,j,k)
-                      R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) + &
-                           betaval_loc * Sn + gammaval_loc * Snp1
-                      ! term in yy
-                      Sn   = factor_loc * epsilondev_yy(i,j,k,ispec)
-                      Snp1   = factor_loc * epsilondev_yy_loc(i,j,k)
-                      R_yy(i,j,k,ispec,i_sls) = alphaval_loc * R_yy(i,j,k,ispec,i_sls) + &
-                           betaval_loc * Sn + gammaval_loc * Snp1
-                      ! term in zz not computed since zero trace
-                      ! term in xy
-                      Sn   = factor_loc * epsilondev_xy(i,j,k,ispec)
-                      Snp1   = factor_loc * epsilondev_xy_loc(i,j,k)
-                      R_xy(i,j,k,ispec,i_sls) = alphaval_loc * R_xy(i,j,k,ispec,i_sls) + &
-                           betaval_loc * Sn + gammaval_loc * Snp1
-                      ! term in xz
-                      Sn   = factor_loc * epsilondev_xz(i,j,k,ispec)
-                      Snp1   = factor_loc * epsilondev_xz_loc(i,j,k)
-                      R_xz(i,j,k,ispec,i_sls) = alphaval_loc * R_xz(i,j,k,ispec,i_sls) + &
-                           betaval_loc * Sn + gammaval_loc * Snp1
-                      ! term in yz
-                      Sn   = factor_loc * epsilondev_yz(i,j,k,ispec)
-                      Snp1   = factor_loc * epsilondev_yz_loc(i,j,k)
-                      R_yz(i,j,k,ispec,i_sls) = alphaval_loc * R_yz(i,j,k,ispec,i_sls) + &
-                           betaval_loc * Sn + gammaval_loc * Snp1
-
-                   enddo   ! end of loop on memory variables
-
-                endif  !  end attenuation
-
-             enddo
-          enddo
-       enddo
-
-       ! save deviatoric strain for Runge-Kutta scheme
-       if ( COMPUTE_AND_STORE_STRAIN ) then
-          epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
-          epsilondev_yy(:,:,:,ispec) = epsilondev_yy_loc(:,:,:)
-          epsilondev_xy(:,:,:,ispec) = epsilondev_xy_loc(:,:,:)
-          epsilondev_xz(:,:,:,ispec) = epsilondev_xz_loc(:,:,:)
-          epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:)
-       endif
-
-    enddo  ! spectral element loop
-    !$OMP END DO
-    !$OMP END PARALLEL
-
-    ! The elements are in order of color. First we do color 1 elements,
-    ! then color 2, etc. The ispec has to moved to start at the next
-    ! color.
-    estart = estart + num_elements
-
-  enddo ! loop over colors
-
-
-  ! "stop" timer
-  ! end_time = omp_get_wtime()
-
-  ! write(*,*) "Total Elapsed time: ", (end_time-start_time) , "seconds. (Threads=",NUM_THREADS,")"
-  ! write(*,*) "Accumulate Elapsed time: ", (accumulate_time_stop-accumulate_time_start) , "seconds"
-
-
-  ! These are now allocated at the beginning and never deallocated
-  ! because the program just finishes at the end.
-
-  ! deallocate(dummyx_loc)
-  ! deallocate(dummyy_loc)
-  ! deallocate(dummyz_loc)
-  ! deallocate(dummyx_loc_att)
-  ! deallocate(dummyy_loc_att)
-  ! deallocate(dummyz_loc_att)
-  ! deallocate(newtempx1)
-  ! deallocate(newtempx2)
-  ! deallocate(newtempx3)
-  ! deallocate(newtempy1)
-  ! deallocate(newtempy2)
-  ! deallocate(newtempy3)
-  ! deallocate(newtempz1)
-  ! deallocate(newtempz2)
-  ! deallocate(newtempz3)
-  ! deallocate(tempx1)
-  ! deallocate(tempx2)
-  ! deallocate(tempx3)
-  ! deallocate(tempy1)
-  ! deallocate(tempy2)
-  ! deallocate(tempy3)
-  ! deallocate(tempz1)
-  ! deallocate(tempz2)
-  ! deallocate(tempz3)
-  ! deallocate(tempx1_att)
-  ! deallocate(tempx2_att)
-  ! deallocate(tempx3_att)
-  ! deallocate(tempy1_att)
-  ! deallocate(tempy2_att)
-  ! deallocate(tempy3_att)
-  ! deallocate(tempz1_att)
-  ! deallocate(tempz2_att)
-  ! deallocate(tempz3_att)
-
-  ! accel(:,:) = accel_omp(:,:,1)
-
-  end subroutine compute_forces_elastic_Dev_openmp
-
-

Copied: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_calling_routine.F90 (from rev 21238, seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic.F90)
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_calling_routine.F90	                        (rev 0)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_calling_routine.F90	2013-01-16 18:04:01 UTC (rev 21239)
@@ -0,0 +1,554 @@
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 1
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and CNRS / INRIA / University of Pau
+! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
+!                             July 2012
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! elastic solver
+
+subroutine compute_forces_elastic()
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+  use fault_solver_dynamic, only : bc_dynflt_set3d_all,SIMULATION_TYPE_DYN
+  use fault_solver_kinematic, only : bc_kinflt_set_all,SIMULATION_TYPE_KIN
+
+  implicit none
+
+  integer:: iphase
+  logical:: phase_is_inner
+
+! distinguishes two runs: for points on MPI interfaces, and points within the partitions
+  do iphase=1,2
+
+    !first for points on MPI interfaces
+    if( iphase == 1 ) then
+      phase_is_inner = .false.
+    else
+      phase_is_inner = .true.
+    endif
+
+
+! elastic term
+    if( .NOT. GPU_MODE ) then
+      if(USE_DEVILLE_PRODUCTS) then
+        ! uses Deville (2002) optimizations
+        call compute_forces_elastic_Dev_sim1(iphase)
+
+        ! adjoint simulations: backward/reconstructed wavefield
+        if( SIMULATION_TYPE == 3 ) &
+          call compute_forces_elastic_Dev_sim3(iphase)
+
+      else
+        ! no optimizations used
+        call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
+                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                        hprime_xx,hprime_yy,hprime_zz, &
+                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                        kappastore,mustore,jacobian,ibool, &
+                        ATTENUATION,deltat,PML_CONDITIONS, &
+                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax,NSPEC2D_BOTTOM,NSPEC2D_TOP, &
+                        ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
+                        one_minus_sum_beta,factor_common, &
+                        alphaval,betaval,gammaval,&
+                        NSPEC_ATTENUATION_AB, &
+                        R_xx,R_yy,R_xy,R_xz,R_yz, &
+                        epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                        epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                        ANISOTROPY,NSPEC_ANISO, &
+                        c11store,c12store,c13store,c14store,c15store,c16store, &
+                        c22store,c23store,c24store,c25store,c26store,c33store, &
+                        c34store,c35store,c36store,c44store,c45store,c46store, &
+                        c55store,c56store,c66store, &
+                        SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
+                        is_moho_top,is_moho_bot, &
+                        dsdx_top,dsdx_bot, &
+                        ispec2D_moho_top,ispec2D_moho_bot, &
+                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic, &
+                        phase_ispec_inner_elastic,ispec_is_elastic )
+
+        ! adjoint simulations: backward/reconstructed wavefield
+        if( SIMULATION_TYPE == 3 ) &
+          call compute_forces_elastic_noDev( iphase, NSPEC_AB,NGLOB_AB, &
+                        b_displ,b_veloc,b_accel, &
+                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                        hprime_xx,hprime_yy,hprime_zz, &
+                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                        kappastore,mustore,jacobian,ibool, &
+                        ATTENUATION,deltat,PML_CONDITIONS, &
+                        nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax,NSPEC2D_BOTTOM,NSPEC2D_TOP, &
+                        ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax,ibelm_bottom,ibelm_top, &
+                        one_minus_sum_beta,factor_common, &
+                        b_alphaval,b_betaval,b_gammaval, &
+                        NSPEC_ATTENUATION_AB, &
+                        b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                        b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                        b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                        ANISOTROPY,NSPEC_ANISO, &
+                        c11store,c12store,c13store,c14store,c15store,c16store, &
+                        c22store,c23store,c24store,c25store,c26store,c33store, &
+                        c34store,c35store,c36store,c44store,c45store,c46store, &
+                        c55store,c56store,c66store, &
+                        SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                        NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
+                        is_moho_top,is_moho_bot, &
+                        b_dsdx_top,b_dsdx_bot, &
+                        ispec2D_moho_top,ispec2D_moho_bot, &
+                        num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic, &
+                        phase_ispec_inner_elastic  )
+
+      endif
+
+    else
+      ! on GPU
+      ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
+      call compute_forces_elastic_cuda(Mesh_pointer, iphase, deltat, &
+                                      nspec_outer_elastic, &
+                                      nspec_inner_elastic, &
+                                      COMPUTE_AND_STORE_STRAIN,ATTENUATION,ANISOTROPY)
+
+      if(phase_is_inner .eqv. .true.) then
+         ! while Inner elements compute "Kernel_2", we wait for MPI to
+         ! finish and transfer the boundary terms to the device
+         ! asynchronously
+
+         !daniel: todo - this avoids calling the fortran vector send from CUDA routine
+         ! wait for asynchronous copy to finish
+         call sync_copy_from_device(Mesh_pointer,iphase,buffer_send_vector_ext_mesh)
+         ! sends mpi buffers
+         call assemble_MPI_vector_send_cuda(NPROC, &
+                  buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                  nibool_interfaces_ext_mesh,&
+                  my_neighbours_ext_mesh, &
+                  request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+
+         ! transfers mpi buffers onto GPU
+         call transfer_boundary_to_device(NPROC,Mesh_pointer,buffer_recv_vector_ext_mesh, &
+                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                  request_recv_vector_ext_mesh)
+      endif ! inner elements
+
+   endif ! GPU_MODE
+
+
+! adds elastic absorbing boundary term to acceleration (Stacey conditions)
+    if( ABSORBING_CONDITIONS ) then
+       call compute_stacey_elastic(NSPEC_AB,NGLOB_AB,accel, &
+                        ibool,ispec_is_inner,phase_is_inner, &
+                        abs_boundary_normal,abs_boundary_jacobian2Dw, &
+                        abs_boundary_ijk,abs_boundary_ispec, &
+                        num_abs_boundary_faces, &
+                        veloc,rho_vp,rho_vs, &
+                        ispec_is_elastic,SIMULATION_TYPE,SAVE_FORWARD, &
+                        NSTEP,it,NGLOB_ADJOINT,b_accel, &
+                        b_num_abs_boundary_faces,b_reclen_field,b_absorb_field,&
+                        GPU_MODE,Mesh_pointer)
+    endif
+
+
+! acoustic coupling
+    if( ACOUSTIC_SIMULATION ) then
+      if( num_coupling_ac_el_faces > 0 ) then
+        if( .NOT. GPU_MODE ) then
+          if( SIMULATION_TYPE == 1 ) then
+            ! forward definition: pressure=-potential_dot_dot
+            call compute_coupling_elastic_ac(NSPEC_AB,NGLOB_AB, &
+                        ibool,accel,potential_dot_dot_acoustic, &
+                        num_coupling_ac_el_faces, &
+                        coupling_ac_el_ispec,coupling_ac_el_ijk, &
+                        coupling_ac_el_normal, &
+                        coupling_ac_el_jacobian2Dw, &
+                        ispec_is_inner,phase_is_inner)
+          else
+            ! handles adjoint runs coupling between adjoint potential and adjoint elastic wavefield
+            ! adoint definition: pressure^\dagger=potential^\dagger
+            call compute_coupling_elastic_ac(NSPEC_AB,NGLOB_AB, &
+                              ibool,accel,-potential_acoustic_adj_coupling, &
+                              num_coupling_ac_el_faces, &
+                              coupling_ac_el_ispec,coupling_ac_el_ijk, &
+                              coupling_ac_el_normal, &
+                              coupling_ac_el_jacobian2Dw, &
+                              ispec_is_inner,phase_is_inner)
+          endif
+
+        ! adjoint simulations
+        if( SIMULATION_TYPE == 3 ) &
+          call compute_coupling_elastic_ac(NSPEC_ADJOINT,NGLOB_ADJOINT, &
+                        ibool,b_accel,b_potential_dot_dot_acoustic, &
+                        num_coupling_ac_el_faces, &
+                        coupling_ac_el_ispec,coupling_ac_el_ijk, &
+                        coupling_ac_el_normal, &
+                        coupling_ac_el_jacobian2Dw, &
+                        ispec_is_inner,phase_is_inner)
+
+        else
+          ! on GPU
+          call compute_coupling_el_ac_cuda(Mesh_pointer,phase_is_inner, &
+                                           num_coupling_ac_el_faces)
+        endif ! GPU_MODE
+      endif ! num_coupling_ac_el_faces
+    endif
+
+
+! poroelastic coupling
+    if( POROELASTIC_SIMULATION ) then
+      call compute_coupling_elastic_po(NSPEC_AB,NGLOB_AB,ibool,&
+                        displs_poroelastic,displw_poroelastic,&
+                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                        hprime_xx,hprime_yy,hprime_zz,&
+                        kappaarraystore,rhoarraystore,mustore, &
+                        phistore,tortstore,jacobian,&
+                        displ,accel,kappastore, &
+                        ANISOTROPY,NSPEC_ANISO, &
+                        c11store,c12store,c13store,c14store,c15store,c16store,&
+                        c22store,c23store,c24store,c25store,c26store,c33store,&
+                        c34store,c35store,c36store,c44store,c45store,c46store,&
+                        c55store,c56store,c66store, &
+                        SIMULATION_TYPE,NGLOB_ADJOINT,NSPEC_ADJOINT, &
+                        num_coupling_el_po_faces, &
+                        coupling_el_po_ispec,coupling_po_el_ispec, &
+                        coupling_el_po_ijk,coupling_po_el_ijk, &
+                        coupling_el_po_normal, &
+                        coupling_el_po_jacobian2Dw, &
+                        ispec_is_inner,phase_is_inner)
+    endif
+
+! adds source term (single-force/moment-tensor solution)
+    call compute_add_sources_elastic( NSPEC_AB,NGLOB_AB,accel, &
+                        ibool,ispec_is_inner,phase_is_inner, &
+                        NSOURCES,myrank,it,islice_selected_source,ispec_selected_source,&
+                        hdur,hdur_gaussian,tshift_src,dt,t0,sourcearrays, &
+                        ispec_is_elastic,SIMULATION_TYPE,NSTEP,NGLOB_ADJOINT, &
+                        nrec,islice_selected_rec,ispec_selected_rec, &
+                        nadj_rec_local,adj_sourcearrays,b_accel, &
+                        NTSTEP_BETWEEN_READ_ADJSRC,NOISE_TOMOGRAPHY, &
+                        GPU_MODE, Mesh_pointer )
+
+    ! assemble all the contributions between slices using MPI
+    if( phase_is_inner .eqv. .false. ) then
+       ! sends accel values to corresponding MPI interface neighbors
+       if(.NOT. GPU_MODE) then
+          call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB,accel, &
+               buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
+               num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+               nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+               my_neighbours_ext_mesh, &
+               request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
+       else ! GPU_MODE==1
+          ! transfers boundary region to host asynchronously. The
+          ! MPI-send is done from within compute_forces_elastic_cuda,
+          ! once the inner element kernels are launched, and the
+          ! memcpy has finished. see compute_forces_elastic_cuda:1655
+          call transfer_boundary_from_device_a(Mesh_pointer,nspec_outer_elastic)
+       endif ! GPU_MODE
+
+       ! adjoint simulations
+       if( SIMULATION_TYPE == 3 ) then
+          if(.NOT. GPU_MODE) then
+             call assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_ADJOINT,b_accel, &
+                  b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
+                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                  nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh,&
+                  my_neighbours_ext_mesh, &
+                  b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
+          else ! GPU_MODE == 1
+             call transfer_boun_accel_from_device(NGLOB_AB*NDIM, Mesh_pointer, b_accel,&
+                       b_buffer_send_vector_ext_mesh,&
+                       num_interfaces_ext_mesh, max_nibool_interfaces_ext_mesh,&
+                       nibool_interfaces_ext_mesh, ibool_interfaces_ext_mesh,3) ! <-- 3 == adjoint b_accel
+             call assemble_MPI_vector_send_cuda(NPROC, &
+                  b_buffer_send_vector_ext_mesh,b_buffer_recv_vector_ext_mesh, &
+                  num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                  nibool_interfaces_ext_mesh,&
+                  my_neighbours_ext_mesh, &
+                  b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh)
+          endif ! GPU
+       endif !adjoint
+
+    else
+      ! waits for send/receive requests to be completed and assembles values
+      if(.NOT. GPU_MODE) then
+         call assemble_MPI_vector_ext_mesh_w_ordered(NPROC,NGLOB_AB,accel, &
+                            buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+                            max_nibool_interfaces_ext_mesh, &
+                            nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                            request_send_vector_ext_mesh,request_recv_vector_ext_mesh, &
+                            my_neighbours_ext_mesh,myrank)
+
+      else ! GPU_MODE == 1
+         call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,accel, Mesh_pointer,&
+                            buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+                            max_nibool_interfaces_ext_mesh, &
+                            nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                            request_send_vector_ext_mesh,request_recv_vector_ext_mesh,1)
+      endif
+      ! adjoint simulations
+      if( SIMULATION_TYPE == 3 ) then
+         if(.NOT. GPU_MODE) then
+            call assemble_MPI_vector_ext_mesh_w_ordered(NPROC,NGLOB_ADJOINT,b_accel, &
+                             b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+                             max_nibool_interfaces_ext_mesh, &
+                             nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                             b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh, &
+                             my_neighbours_ext_mesh,myrank)
+
+         else ! GPU_MODE == 1
+            call assemble_MPI_vector_write_cuda(NPROC,NGLOB_AB,b_accel, Mesh_pointer,&
+                              b_buffer_recv_vector_ext_mesh,num_interfaces_ext_mesh,&
+                              max_nibool_interfaces_ext_mesh, &
+                              nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                              b_request_send_vector_ext_mesh,b_request_recv_vector_ext_mesh,3)
+         endif
+      endif !adjoint
+
+    endif
+
+    !! DK DK May 2009: removed this because now each slice of a CUBIT + SCOTCH mesh
+    !! DK DK May 2009: has a different number of spectral elements and therefore
+    !! DK DK May 2009: only the general non-blocking MPI routines assemble_MPI_vector_ext_mesh_s
+    !! DK DK May 2009: and assemble_MPI_vector_ext_mesh_w above can be used.
+    !! DK DK May 2009: For adjoint runs below (SIMULATION_TYPE == 3) they should be used as well.
+
+ enddo
+
+!Percy , Fault boundary term B*tau is added to the assembled forces 
+!        which at this point are stored in the array 'accel'
+  if (SIMULATION_TYPE_DYN) call bc_dynflt_set3d_all(accel,veloc,displ)
+  
+  if (SIMULATION_TYPE_KIN) call bc_kinflt_set_all(accel,veloc,displ)
+
+ ! multiplies with inverse of mass matrix (note: rmass has been inverted already)
+ if(.NOT. GPU_MODE) then
+    accel(1,:) = accel(1,:)*rmassx(:)
+    accel(2,:) = accel(2,:)*rmassy(:)
+    accel(3,:) = accel(3,:)*rmassz(:)
+    ! adjoint simulations
+    if (SIMULATION_TYPE == 3) then
+       b_accel(1,:) = b_accel(1,:)*rmassx(:)
+       b_accel(2,:) = b_accel(2,:)*rmassy(:)
+       b_accel(3,:) = b_accel(3,:)*rmassz(:)
+    endif !adjoint
+ else ! GPU_MODE == 1
+    call kernel_3_a_cuda(Mesh_pointer, NGLOB_AB, deltatover2,b_deltatover2,OCEANS)
+ endif
+
+! updates acceleration with ocean load term
+  if(OCEANS) then
+    if( .NOT. GPU_MODE ) then
+      call compute_coupling_ocean(NSPEC_AB,NGLOB_AB, &
+                                  ibool,rmassx,rmassy,rmassz, &
+                                  rmass_ocean_load,accel, &
+                                  free_surface_normal,free_surface_ijk,free_surface_ispec, &
+                                  num_free_surface_faces,SIMULATION_TYPE, &
+                                  NGLOB_ADJOINT,b_accel)
+    else
+      ! on GPU
+      call compute_coupling_ocean_cuda(Mesh_pointer)
+    endif
+  endif
+
+! updates velocities
+! Newmark finite-difference time scheme with elastic domains:
+! (see e.g. Hughes, 1987; Chaljub et al., 2003)
+!
+! u(t+delta_t) = u(t) + delta_t  v(t) + 1/2  delta_t**2 a(t)
+! v(t+delta_t) = v(t) + 1/2 delta_t a(t) + 1/2 delta_t a(t+delta_t)
+! a(t+delta_t) = 1/M_elastic ( -K_elastic u(t+delta) + B_elastic chi_dot_dot(t+delta_t) + f( t+delta_t) )
+!
+! where
+!   u, v, a are displacement,velocity & acceleration
+!   M is mass matrix, K stiffness matrix and B boundary term for acoustic/elastic domains
+!   f denotes a source term (acoustic/elastic)
+!   chi_dot_dot is acoustic (fluid) potential ( dotted twice with respect to time)
+!
+! corrector:
+!   updates the velocity term which requires a(t+delta)
+! GPU_MODE: this is handled in 'kernel_3' at the same time as accel*rmass
+  if(.NOT. GPU_MODE) then
+     veloc(:,:) = veloc(:,:) + deltatover2*accel(:,:)
+     ! adjoint simulations
+     if (SIMULATION_TYPE == 3) b_veloc(:,:) = b_veloc(:,:) + b_deltatover2*b_accel(:,:)
+  else ! GPU_MODE == 1
+    if( OCEANS ) call kernel_3_b_cuda(Mesh_pointer, NGLOB_AB, deltatover2,b_deltatover2)
+  endif
+
+
+end subroutine compute_forces_elastic
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! distributes routines according to chosen NGLLX in constants.h
+
+!daniel: note -- i put it here rather than in compute_forces_elastic_Dev.f90 because compiler complains that:
+! " The storage extent of the dummy argument exceeds that of the actual argument. "
+
+subroutine compute_forces_elastic_Dev_sim1(iphase)
+
+! forward simulations
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+
+  implicit none
+
+  integer,intent(in) :: iphase
+
+  select case(NGLLX)
+
+  case (5)
+
+!----------------------------------------------------------------------------------------------
+
+! OpenMP routine flag for testing & benchmarking forward runs only
+! configure additional flag, e.g.: FLAGS_NO_CHECK="-O3 -DOPENMP_MODE -openmp"
+
+!----------------------------------------------------------------------------------------------
+#ifdef OPENMP_MODE
+!! DK DK Jan 2013: beware, that OpenMP version is not maintained / supported and thus probably does not work
+    call compute_forces_elastic_Dev_openmp(iphase, NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
+           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+           hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+           wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+           kappastore,mustore,jacobian,ibool, &
+           ATTENUATION,deltat, &
+           one_minus_sum_beta,factor_common, &
+           alphaval,betaval,gammaval, &
+           NSPEC_ATTENUATION_AB, &
+           R_xx,R_yy,R_xy,R_xz,R_yz, &
+           epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+           epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+           ANISOTROPY,NSPEC_ANISO, &
+           c11store,c12store,c13store,c14store,c15store,c16store,&
+           c22store,c23store,c24store,c25store,c26store,c33store,&
+           c34store,c35store,c36store,c44store,c45store,c46store,&
+           c55store,c56store,c66store, &
+           SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+           NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+           is_moho_top,is_moho_bot, &
+           dsdx_top,dsdx_bot, &
+           ispec2D_moho_top,ispec2D_moho_bot, &
+           num_phase_ispec_elastic,&
+           phase_ispec_inner_elastic,&
+           num_colors_outer_elastic,num_colors_inner_elastic)
+#else
+    call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB,displ,veloc,accel, &
+             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+             hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+             wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+             kappastore,mustore,jacobian,ibool, &
+             ATTENUATION,deltat, &
+             one_minus_sum_beta,factor_common, &
+             alphaval,betaval,gammaval, &
+             NSPEC_ATTENUATION_AB, &
+             R_xx,R_yy,R_xy,R_xz,R_yz, &
+             epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+             epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+             ANISOTROPY,NSPEC_ANISO, &
+             c11store,c12store,c13store,c14store,c15store,c16store,&
+             c22store,c23store,c24store,c25store,c26store,c33store,&
+             c34store,c35store,c36store,c44store,c45store,c46store,&
+             c55store,c56store,c66store, &
+             SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+             NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+             is_moho_top,is_moho_bot, &
+             dsdx_top,dsdx_bot, &
+             ispec2D_moho_top,ispec2D_moho_bot, &
+             num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+             phase_ispec_inner_elastic )
+#endif
+
+  case default
+
+    stop 'error no Deville routine available for chosen NGLLX'
+
+  end select
+
+end subroutine compute_forces_elastic_Dev_sim1
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+subroutine compute_forces_elastic_Dev_sim3(iphase)
+
+! uses backward/reconstructed displacement and acceleration arrays
+
+  use specfem_par
+  use specfem_par_acoustic
+  use specfem_par_elastic
+  use specfem_par_poroelastic
+
+  implicit none
+
+  integer,intent(in) :: iphase
+
+  select case(NGLLX)
+
+  case (5)
+    call compute_forces_elastic_Dev_5p(iphase, NSPEC_AB,NGLOB_AB, &
+                  b_displ,b_veloc,b_accel, &
+                  xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                  hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+                  wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                  kappastore,mustore,jacobian,ibool, &
+                  ATTENUATION,deltat, &
+                  one_minus_sum_beta,factor_common, &
+                  b_alphaval,b_betaval,b_gammaval, &
+                  NSPEC_ATTENUATION_AB, &
+                  b_R_xx,b_R_yy,b_R_xy,b_R_xz,b_R_yz, &
+                  b_epsilondev_xx,b_epsilondev_yy,b_epsilondev_xy, &
+                  b_epsilondev_xz,b_epsilondev_yz,b_epsilon_trace_over_3, &
+                  ANISOTROPY,NSPEC_ANISO, &
+                  c11store,c12store,c13store,c14store,c15store,c16store,&
+                  c22store,c23store,c24store,c25store,c26store,c33store,&
+                  c34store,c35store,c36store,c44store,c45store,c46store,&
+                  c55store,c56store,c66store, &
+                  SIMULATION_TYPE, COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY,&
+                  NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT,&
+                  is_moho_top,is_moho_bot, &
+                  b_dsdx_top,b_dsdx_bot, &
+                  ispec2D_moho_top,ispec2D_moho_bot, &
+                  num_phase_ispec_elastic,nspec_inner_elastic,nspec_outer_elastic,&
+                  phase_ispec_inner_elastic )
+
+  case default
+
+    stop 'error no Deville routine available for chosen NGLLX'
+
+  end select
+
+
+end subroutine compute_forces_elastic_Dev_sim3
+

Deleted: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_fluid.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_fluid.f90	2013-01-16 17:48:05 UTC (rev 21238)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_fluid.f90	2013-01-16 18:04:01 UTC (rev 21239)
@@ -1,566 +0,0 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 1
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and CNRS / INRIA / University of Pau
-! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
-!                             July 2012
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-
-  subroutine compute_forces_fluid( iphase, &
-                        NSPEC_AB,NGLOB_AB,displw_poroelastic,accelw_poroelastic,&
-                        velocw_poroelastic,displs_poroelastic,&
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_yy,hprime_zz,&
-                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wxgll,wygll,wzgll,  &
-                        kappaarraystore,rhoarraystore,mustore,etastore,permstore, &
-                        phistore,tortstore,jacobian,ibool,&
-                        epsilonwdev_xx,epsilonwdev_yy,epsilonwdev_xy,&
-                        epsilonwdev_xz,epsilonwdev_yz,epsilonw_trace_over_3, &
-                        SIMULATION_TYPE,NSPEC_ADJOINT, &
-                        num_phase_ispec_poroelastic,nspec_inner_poroelastic,nspec_outer_poroelastic,&
-                        phase_ispec_inner_poroelastic )
-
-! compute forces for the fluid poroelastic part
-
-  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
-                      N_SLS, &
-                      ONE_THIRD,FOUR_THIRDS
-
-  implicit none
-
-  integer :: iphase
-  integer :: NSPEC_AB,NGLOB_AB
-
-! adjoint simulations
-  integer :: SIMULATION_TYPE
-  !integer :: NSPEC_BOUN
-  integer :: NSPEC_ADJOINT
-! adjoint wavefields
-!  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: &
-!    mufr_kl, B_kl
-
-! displacement and acceleration
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displw_poroelastic,accelw_poroelastic,&
-                                                      velocw_poroelastic
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displs_poroelastic
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: &
-       epsilonwdev_xx,epsilonwdev_yy,epsilonwdev_xy,epsilonwdev_xz,epsilonwdev_yz
-  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilonw_trace_over_3
-
-! arrays with mesh parameters per slice
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        mustore,etastore,phistore,tortstore,jacobian
-  real(kind=CUSTOM_REAL), dimension(2,NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: rhoarraystore
-  real(kind=CUSTOM_REAL), dimension(3,NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: kappaarraystore
-  real(kind=CUSTOM_REAL), dimension(6,NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: permstore
-
-! Gauss-Lobatto-Legendre points of integration and weights
-  double precision, dimension(NGLLX) :: wxgll
-  double precision, dimension(NGLLY) :: wygll
-  double precision, dimension(NGLLZ) :: wzgll
-
-! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
-  real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-!  logical,dimension(NSPEC_AB) :: ispec_is_elastic
-  integer :: num_phase_ispec_poroelastic,nspec_inner_poroelastic,nspec_outer_poroelastic
-  integer, dimension(num_phase_ispec_poroelastic,2) :: phase_ispec_inner_poroelastic
-
-!---
-!--- local variables
-!---
-
-  integer :: ispec,i,j,k,l,iglob,num_elements,ispec_p
-
-! spatial derivatives
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
-    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
-    tempx1p,tempx2p,tempx3p,tempy1p,tempy2p,tempy3p,tempz1p,tempz2p,tempz3p
-!    b_tempx1,b_tempx2,b_tempx3,b_tempy1,b_tempy2,b_tempy3,b_tempz1,b_tempz2,b_tempz3, &
-!    b_tempx1p,b_tempx2p,b_tempx3p,b_tempy1p,b_tempy2p,b_tempy3p,b_tempz1p,b_tempz2p,b_tempz3p
-
-  real(kind=CUSTOM_REAL) :: duxdxl,duydxl,duzdxl,duxdyl,duydyl,duzdyl,duxdzl,duydzl,duzdzl
-  real(kind=CUSTOM_REAL) :: dwxdxl,dwydxl,dwzdxl,dwxdyl,dwydyl,dwzdyl,dwxdzl,dwydzl,dwzdzl
-
-  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
-  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl_plus_duzdzl,dwxdxl_plus_dwydyl_plus_dwzdzl
-
-  real(kind=CUSTOM_REAL) hp1,hp2,hp3
-  real(kind=CUSTOM_REAL) fac1,fac2,fac3
-
-  real(kind=CUSTOM_REAL) tempx1ls,tempx2ls,tempx3ls,tempx1lw,tempx2lw,tempx3lw
-  real(kind=CUSTOM_REAL) tempy1ls,tempy2ls,tempy3ls,tempy1lw,tempy2lw,tempy3lw
-  real(kind=CUSTOM_REAL) tempz1ls,tempz2ls,tempz3ls,tempz1lw,tempz2lw,tempz3lw
-
-!  real(kind=CUSTOM_REAL) :: b_dux_dxl,b_duz_dxl,b_dux_dzl,b_duz_dzl
-!  real(kind=CUSTOM_REAL) :: dsxx,dsxy,dsxz,dsyy,dsyz,dszz
-!  real(kind=CUSTOM_REAL) :: b_dsxx,b_dsxy,b_dsxz,b_dsyy,b_dsyz,b_dszz
-!  real(kind=CUSTOM_REAL) :: b_dwx_dxl,b_dwz_dxl,b_dwx_dzl,b_dwz_dzl
-!  real(kind=CUSTOM_REAL) :: dwxx,dwxy,dwxz,dwyy,dwyz,dwzz
-!  real(kind=CUSTOM_REAL) :: b_dwxx,b_dwxy,b_dwxz,b_dwyy,b_dwyz,b_dwzz
-  real(kind=CUSTOM_REAL) :: sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy
-  real(kind=CUSTOM_REAL) :: sigmap
-!  real(kind=CUSTOM_REAL) :: b_sigma_xx,b_sigma_yy,b_sigma_zz,b_sigma_xy,b_sigma_xz,b_sigma_yz
-!  real(kind=CUSTOM_REAL) :: b_sigmap
-!  real(kind=CUSTOM_REAL) :: nx,nz,vx,vz,vn,vxf,vzf,vnf,rho_vpI,rho_vpII,rho_vs,tx,tz,weight,xxi,zxi,xgamma,zgamma,jacobian1D
-
-! viscous attenuation (poroelastic media)
-  real(kind=CUSTOM_REAL), dimension(6) :: bl_relaxed
-
-
-! Jacobian matrix and determinant
-  real(kind=CUSTOM_REAL) ::  xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
-
-! material properties of the poroelastic medium
-!  real(kind=CUSTOM_REAL) :: mul_unrelaxed,lambdal_unrelaxed,lambdalplus2mul_unrelaxed
-  real(kind=CUSTOM_REAL) :: kappal_s,rhol_s
-  real(kind=CUSTOM_REAL) :: etal_f,kappal_f,rhol_f
-  real(kind=CUSTOM_REAL) :: mul_fr,kappal_fr,phil,tortl,viscodampx,viscodampy,viscodampz
-  real(kind=CUSTOM_REAL) :: permlxx,permlxy,permlxz,permlyz,permlyy,permlzz,&
-                            invpermlxx,invpermlxy,invpermlxz,invpermlyz,invpermlyy,invpermlzz,detk
-  real(kind=CUSTOM_REAL) :: D_biot,H_biot,C_biot,M_biot,rhol_bar
-
-  real(kind=CUSTOM_REAL) :: mul_G,lambdal_G,lambdalplus2mul_G
-!  real(kind=CUSTOM_REAL) :: cpIsquare,cpIIsquare,cssquare,cpIl,cpIIl,csl
-
-! for attenuation
-!  real(kind=CUSTOM_REAL) :: Un,Unp1,tauinv,Sn,Snp1,theta_n,theta_np1,tauinvsquare,tauinvcube,tauinvUn
-
-! compute Grad(displs_poroelastic) at time step n for attenuation
-!  if(TURN_ATTENUATION_ON) call compute_gradient_attenuation(displs_poroelastic,dux_dxl_n,duz_dxl_n, &
-!      dux_dzl_n,duz_dzl_n,xix,xiz,gammax,gammaz,ibool,poroelastic,hprime_xx,hprime_zz,nspec,npoin)
-
-
-  if( iphase == 1 ) then
-    num_elements = nspec_outer_poroelastic
-  else
-    num_elements = nspec_inner_poroelastic
-  endif
-
-! loop over spectral elements
-  do ispec_p = 1,num_elements
-
-        ispec = phase_ispec_inner_poroelastic(ispec_p,iphase)
-
-! first double loop over GLL points to compute and store gradients
-    do k=1,NGLLZ
-      do j = 1,NGLLY
-        do i = 1,NGLLX
-
-! get poroelastic parameters of current local GLL
-    phil = phistore(i,j,k,ispec)
-    tortl = tortstore(i,j,k,ispec)
-!solid properties
-    kappal_s = kappaarraystore(1,i,j,k,ispec)
-    rhol_s = rhoarraystore(1,i,j,k,ispec)
-!fluid properties
-    kappal_f = kappaarraystore(2,i,j,k,ispec)
-    rhol_f = rhoarraystore(2,i,j,k,ispec)
-!frame properties
-    mul_fr = mustore(i,j,k,ispec)
-    kappal_fr = kappaarraystore(3,i,j,k,ispec)
-    rhol_bar =  (1._CUSTOM_REAL - phil)*rhol_s + phil*rhol_f
-!Biot coefficients for the input phi
-      D_biot = kappal_s*(1._CUSTOM_REAL + phil*(kappal_s/kappal_f - 1._CUSTOM_REAL))
-      H_biot = (kappal_s - kappal_fr)*(kappal_s - kappal_fr)/(D_biot - kappal_fr) + &
-                kappal_fr + 4._CUSTOM_REAL*mul_fr/3._CUSTOM_REAL
-      C_biot = kappal_s*(kappal_s - kappal_fr)/(D_biot - kappal_fr)
-      M_biot = kappal_s*kappal_s/(D_biot - kappal_fr)
-!The RHS has the form : div T -phi/c div T_f + phi/ceta_fk^-1.partial t w
-!where T = G:grad u_s + C_biot div w I
-!and T_f = C_biot div u_s I + M_biot div w I
-      mul_G = mul_fr
-      lambdal_G = H_biot - 2._CUSTOM_REAL*mul_fr
-      lambdalplus2mul_G = lambdal_G + 2._CUSTOM_REAL*mul_G
-
-! derivative along x,y,z for u_s and w
-              tempx1ls = 0.
-              tempx2ls = 0.
-              tempx3ls = 0.
-
-              tempy1ls = 0.
-              tempy2ls = 0.
-              tempy3ls = 0.
-
-              tempz1ls = 0.
-              tempz2ls = 0.
-              tempz3ls = 0.
-
-              tempx1lw = 0.
-              tempx2lw = 0.
-              tempx3lw = 0.
-
-              tempy1lw = 0.
-              tempy2lw = 0.
-              tempy3lw = 0.
-
-              tempz1lw = 0.
-              tempz2lw = 0.
-              tempz3lw = 0.
-
-! first double loop over GLL points to compute and store gradients
-          do l = 1,NGLLX
-                hp1 = hprime_xx(i,l)
-                iglob = ibool(l,j,k,ispec)
-                tempx1ls = tempx1ls + displs_poroelastic(1,iglob)*hp1
-                tempy1ls = tempy1ls + displs_poroelastic(2,iglob)*hp1
-                tempz1ls = tempz1ls + displs_poroelastic(3,iglob)*hp1
-                tempx1lw = tempx1lw + displw_poroelastic(1,iglob)*hp1
-                tempy1lw = tempy1lw + displw_poroelastic(2,iglob)*hp1
-                tempz1lw = tempz1lw + displw_poroelastic(3,iglob)*hp1
-    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
-
-    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
-                hp2 = hprime_yy(j,l)
-                iglob = ibool(i,l,k,ispec)
-                tempx2ls = tempx2ls + displs_poroelastic(1,iglob)*hp2
-                tempy2ls = tempy2ls + displs_poroelastic(2,iglob)*hp2
-                tempz2ls = tempz2ls + displs_poroelastic(3,iglob)*hp2
-                tempx2lw = tempx2lw + displw_poroelastic(1,iglob)*hp2
-                tempy2lw = tempy2lw + displw_poroelastic(2,iglob)*hp2
-                tempz2lw = tempz2lw + displw_poroelastic(3,iglob)*hp2
-    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
-
-    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
-                hp3 = hprime_zz(k,l)
-                iglob = ibool(i,j,l,ispec)
-                tempx3ls = tempx3ls + displs_poroelastic(1,iglob)*hp3
-                tempy3ls = tempy3ls + displs_poroelastic(2,iglob)*hp3
-                tempz3ls = tempz3ls + displs_poroelastic(3,iglob)*hp3
-                tempx3lw = tempx3lw + displw_poroelastic(1,iglob)*hp3
-                tempy3lw = tempy3lw + displw_poroelastic(2,iglob)*hp3
-                tempz3lw = tempz3lw + displw_poroelastic(3,iglob)*hp3
-          enddo
-
-              xixl = xix(i,j,k,ispec)
-              xiyl = xiy(i,j,k,ispec)
-              xizl = xiz(i,j,k,ispec)
-              etaxl = etax(i,j,k,ispec)
-              etayl = etay(i,j,k,ispec)
-              etazl = etaz(i,j,k,ispec)
-              gammaxl = gammax(i,j,k,ispec)
-              gammayl = gammay(i,j,k,ispec)
-              gammazl = gammaz(i,j,k,ispec)
-              jacobianl = jacobian(i,j,k,ispec)
-
-! derivatives of displacement
-              duxdxl = xixl*tempx1ls + etaxl*tempx2ls + gammaxl*tempx3ls
-              duxdyl = xiyl*tempx1ls + etayl*tempx2ls + gammayl*tempx3ls
-              duxdzl = xizl*tempx1ls + etazl*tempx2ls + gammazl*tempx3ls
-
-              duydxl = xixl*tempy1ls + etaxl*tempy2ls + gammaxl*tempy3ls
-              duydyl = xiyl*tempy1ls + etayl*tempy2ls + gammayl*tempy3ls
-              duydzl = xizl*tempy1ls + etazl*tempy2ls + gammazl*tempy3ls
-
-              duzdxl = xixl*tempz1ls + etaxl*tempz2ls + gammaxl*tempz3ls
-              duzdyl = xiyl*tempz1ls + etayl*tempz2ls + gammayl*tempz3ls
-              duzdzl = xizl*tempz1ls + etazl*tempz2ls + gammazl*tempz3ls
-
-              dwxdxl = xixl*tempx1lw + etaxl*tempx2lw + gammaxl*tempx3lw
-              dwxdyl = xiyl*tempx1lw + etayl*tempx2lw + gammayl*tempx3lw
-              dwxdzl = xizl*tempx1lw + etazl*tempx2lw + gammazl*tempx3lw
-
-              dwydxl = xixl*tempy1lw + etaxl*tempy2lw + gammaxl*tempy3lw
-              dwydyl = xiyl*tempy1lw + etayl*tempy2lw + gammayl*tempy3lw
-              dwydzl = xizl*tempy1lw + etazl*tempy2lw + gammazl*tempy3lw
-
-              dwzdxl = xixl*tempz1lw + etaxl*tempz2lw + gammaxl*tempz3lw
-              dwzdyl = xiyl*tempz1lw + etayl*tempz2lw + gammayl*tempz3lw
-              dwzdzl = xizl*tempz1lw + etazl*tempz2lw + gammazl*tempz3lw
-
-    ! precompute some sums to save CPU time
-              duxdxl_plus_duydyl_plus_duzdzl = duxdxl + duydyl + duzdzl
-              dwxdxl_plus_dwydyl_plus_dwzdzl = dwxdxl + dwydyl + dwzdzl
-              duxdxl_plus_duydyl = duxdxl + duydyl
-              duxdxl_plus_duzdzl = duxdxl + duzdzl
-              duydyl_plus_duzdzl = duydyl + duzdzl
-              duxdyl_plus_duydxl = duxdyl + duydxl
-              duzdxl_plus_duxdzl = duzdxl + duxdzl
-              duzdyl_plus_duydzl = duzdyl + duydzl
-
-! compute stress tensor (include attenuation or anisotropy if needed)
-
-!  if(VISCOATTENUATION) then
-!chris:check
-
-! Dissipation only controlled by frame share attenuation in poroelastic (see Morency & Tromp, GJI 2008).
-! attenuation is implemented following the memory variable formulation of
-! J. M. Carcione, Seismic modeling in viscoelastic media, Geophysics,
-! vol. 58(1), p. 110-120 (1993). More details can be found in
-! J. M. Carcione, D. Kosloff and R. Kosloff, Wave propagation simulation in a linear
-! viscoelastic medium, Geophysical Journal International, vol. 95, p. 597-611 (1988).
-
-!  else
-
-! no attenuation
-    sigma_xx = lambdalplus2mul_G*duxdxl + lambdal_G*duydyl_plus_duzdzl + C_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
-    sigma_yy = lambdalplus2mul_G*duydyl + lambdal_G*duxdxl_plus_duzdzl + C_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
-    sigma_zz = lambdalplus2mul_G*duzdzl + lambdal_G*duxdxl_plus_duydyl + C_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
-
-    sigma_xy = mul_G*duxdyl_plus_duydxl
-    sigma_xz = mul_G*duzdxl_plus_duxdzl
-    sigma_yz = mul_G*duzdyl_plus_duydzl
-
-    sigmap = C_biot*duxdxl_plus_duydyl_plus_duzdzl + M_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
-
-          if(SIMULATION_TYPE == 3) then ! kernels calculation
-    epsilonw_trace_over_3(i,j,k,ispec) = ONE_THIRD * (duxdxl + duydyl + duzdzl)
-    epsilonwdev_xx(i,j,k,ispec) = duxdxl - ONE_THIRD * (duxdxl + duydyl + duzdzl)
-    epsilonwdev_yy(i,j,k,ispec) = duydyl - ONE_THIRD * (duxdxl + duydyl + duzdzl)
-    epsilonwdev_xy(i,j,k,ispec) = 0.5 * duxdyl_plus_duydxl
-    epsilonwdev_xz(i,j,k,ispec) = 0.5 * duzdxl_plus_duxdzl
-    epsilonwdev_yz(i,j,k,ispec) = 0.5 * duzdyl_plus_duydzl
-          endif
-!  endif !if(VISCOATTENUATION)
-
-! weak formulation term based on stress tensor (non-symmetric form)
-            ! define symmetric components of sigma
-            sigma_yx = sigma_xy
-            sigma_zx = sigma_xz
-            sigma_zy = sigma_yz
-
-            ! form dot product with test vector, non-symmetric form (which is useful in the case of PML)
-            tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
-            tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
-            tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
-
-            tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
-            tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
-            tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
-
-            tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
-            tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
-            tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
-
-          tempx1p(i,j,k) = jacobianl * sigmap*xixl
-          tempy1p(i,j,k) = jacobianl * sigmap*xiyl
-          tempz1p(i,j,k) = jacobianl * sigmap*xizl
-
-          tempx2p(i,j,k) = jacobianl * sigmap*etaxl
-          tempy2p(i,j,k) = jacobianl * sigmap*etayl
-          tempz2p(i,j,k) = jacobianl * sigmap*etazl
-
-          tempx3p(i,j,k) = jacobianl * sigmap*gammaxl
-          tempy3p(i,j,k) = jacobianl * sigmap*gammayl
-          tempz3p(i,j,k) = jacobianl * sigmap*gammazl
-
-        enddo
-      enddo
-    enddo
-
-!
-! second double-loop over GLL to compute all the terms
-!
-    do k = 1,NGLLZ
-      do j = 1,NGLLY
-        do i = 1,NGLLX
-
-              tempx1ls = 0.
-              tempy1ls = 0.
-              tempz1ls = 0.
-
-              tempx2ls = 0.
-              tempy2ls = 0.
-              tempz2ls = 0.
-
-              tempx3ls = 0.
-              tempy3ls = 0.
-              tempz3ls = 0.
-
-              tempx1lw = 0.
-              tempy1lw = 0.
-              tempz1lw = 0.
-
-              tempx2lw = 0.
-              tempy2lw = 0.
-              tempz2lw = 0.
-
-              tempx3lw = 0.
-              tempy3lw = 0.
-              tempz3lw = 0.
-
-              do l=1,NGLLX
-                fac1 = hprimewgll_xx(l,i)
-                tempx1ls = tempx1ls + tempx1(l,j,k)*fac1
-                tempy1ls = tempy1ls + tempy1(l,j,k)*fac1
-                tempz1ls = tempz1ls + tempz1(l,j,k)*fac1
-                tempx1lw = tempx1lw + tempx1p(l,j,k)*fac1
-                tempy1lw = tempy1lw + tempy1p(l,j,k)*fac1
-                tempz1lw = tempz1lw + tempz1p(l,j,k)*fac1
-                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
-
-                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
-                fac2 = hprimewgll_yy(l,j)
-                tempx2ls = tempx2ls + tempx2(i,l,k)*fac2
-                tempy2ls = tempy2ls + tempy2(i,l,k)*fac2
-                tempz2ls = tempz2ls + tempz2(i,l,k)*fac2
-                tempx2lw = tempx2lw + tempx2p(i,l,k)*fac2
-                tempy2lw = tempy2lw + tempy2p(i,l,k)*fac2
-                tempz2lw = tempz2lw + tempz2p(i,l,k)*fac2
-                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
-
-                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
-                fac3 = hprimewgll_zz(l,k)
-                tempx3ls = tempx3ls + tempx3(i,j,l)*fac3
-                tempy3ls = tempy3ls + tempy3(i,j,l)*fac3
-                tempz3ls = tempz3ls + tempz3(i,j,l)*fac3
-                tempx3lw = tempx3lw + tempx3p(i,j,l)*fac3
-                tempy3lw = tempy3lw + tempy3p(i,j,l)*fac3
-                tempz3lw = tempz3lw + tempz3p(i,j,l)*fac3
-              enddo
-
-              fac1 = wgllwgll_yz(j,k)
-              fac2 = wgllwgll_xz(i,k)
-              fac3 = wgllwgll_xy(i,j)
-
-! get poroelastic parameters of current local GLL
-    phil = phistore(i,j,k,ispec)
-!solid properties
-    rhol_s = rhoarraystore(1,i,j,k,ispec)
-!fluid properties
-    rhol_f = rhoarraystore(2,i,j,k,ispec)
-!frame properties
-    rhol_bar =  (1._CUSTOM_REAL - phil)*rhol_s + phil*rhol_f
-
-    ! sum contributions from each element to the global mesh
-
-              iglob = ibool(i,j,k,ispec)
-
-
-    accelw_poroelastic(1,iglob) = accelw_poroelastic(1,iglob) + ( fac1*(rhol_f/rhol_bar*tempx1ls - tempx1lw) &
-           + fac2*(rhol_f/rhol_bar*tempx2ls - tempx2lw) + fac3*(rhol_f/rhol_bar*tempx3ls - tempx3lw) )
-
-    accelw_poroelastic(2,iglob) = accelw_poroelastic(2,iglob) + ( fac1*(rhol_f/rhol_bar*tempy1ls - tempy1lw) &
-           + fac2*(rhol_f/rhol_bar*tempy2ls - tempy2lw) + fac3*(rhol_f/rhol_bar*tempy3ls - tempy3lw) )
-
-    accelw_poroelastic(3,iglob) = accelw_poroelastic(3,iglob) + ( fac1*(rhol_f/rhol_bar*tempz1ls - tempz1lw) &
-           + fac2*(rhol_f/rhol_bar*tempz2ls - tempz2lw) + fac3*(rhol_f/rhol_bar*tempz3ls - tempz3lw) )
-
-
-!
-!---- viscous damping
-!
-! add + phi/tort eta_f k^-1 dot(w)
-
-    etal_f = etastore(i,j,k,ispec)
-
-      if(etal_f >0.d0) then
-
-    permlxx = permstore(1,i,j,k,ispec)
-    permlxy = permstore(2,i,j,k,ispec)
-    permlxz = permstore(3,i,j,k,ispec)
-    permlyy = permstore(4,i,j,k,ispec)
-    permlyz = permstore(5,i,j,k,ispec)
-    permlzz = permstore(6,i,j,k,ispec)
-
-! calcul of the inverse of k
-    detk = permlxz*(permlxy*permlyz-permlxz*permlyy) &
-         - permlxy*(permlxy*permlzz-permlyz*permlxz) &
-         + permlxx*(permlyy*permlzz-permlyz*permlyz)
-
-    if(detk /= 0.d0) then
-     invpermlxx = (permlyy*permlzz-permlyz*permlyz)/detk
-     invpermlxy = (permlxz*permlyz-permlxy*permlzz)/detk
-     invpermlxz = (permlxy*permlyz-permlxz*permlyy)/detk
-     invpermlyy = (permlxx*permlzz-permlxz*permlxz)/detk
-     invpermlyz = (permlxy*permlxz-permlxx*permlyz)/detk
-     invpermlzz = (permlxx*permlyy-permlxy*permlxy)/detk
-    else
-      stop 'Permeability matrix is not inversible'
-    endif
-
-! relaxed viscous coef
-          bl_relaxed(1) = etal_f*invpermlxx
-          bl_relaxed(2) = etal_f*invpermlxy
-          bl_relaxed(3) = etal_f*invpermlxz
-          bl_relaxed(4) = etal_f*invpermlyy
-          bl_relaxed(5) = etal_f*invpermlyz
-          bl_relaxed(6) = etal_f*invpermlzz
-
-!    if(VISCOATTENUATION) then
-!          bl_unrelaxed(1) = etal_f*invpermlxx*theta_e/theta_s
-!          bl_unrelaxed(2) = etal_f*invpermlxz*theta_e/theta_s
-!          bl_unrelaxed(3) = etal_f*invpermlzz*theta_e/theta_s
-!    endif
-
-!     do k = 1,NGLLZ
-!      do j = 1,NGLLY
-!        do i = 1,NGLLX
-
-!              iglob = ibool(i,j,k,ispec)
-
-!     if(VISCOATTENUATION) then
-! compute the viscous damping term with the unrelaxed viscous coef and add memory variable
-!      viscodampx = velocw_poroelastic(1,iglob)*bl_unrelaxed(1) + velocw_poroelastic(2,iglob)*bl_unrelaxed(2)&
-!                  - rx_viscous(i,j,ispec)
-!      viscodampz = velocw_poroelastic(1,iglob)*bl_unrelaxed(2) + velocw_poroelastic(2,iglob)*bl_unrelaxed(3)&
-!                  - rz_viscous(i,j,ispec)
-!     else
-
-! no viscous attenuation
-      viscodampx = velocw_poroelastic(1,iglob)*bl_relaxed(1) + velocw_poroelastic(2,iglob)*bl_relaxed(2) + &
-                   velocw_poroelastic(3,iglob)*bl_relaxed(3)
-      viscodampy = velocw_poroelastic(1,iglob)*bl_relaxed(2) + velocw_poroelastic(2,iglob)*bl_relaxed(4) + &
-                   velocw_poroelastic(3,iglob)*bl_relaxed(5)
-      viscodampz = velocw_poroelastic(1,iglob)*bl_relaxed(3) + velocw_poroelastic(2,iglob)*bl_relaxed(5) + &
-                   velocw_poroelastic(3,iglob)*bl_relaxed(6)
-!     endif
-
-     accelw_poroelastic(1,iglob) = accelw_poroelastic(1,iglob) - wxgll(i)*wygll(j)*wzgll(k)*jacobian(i,j,k,ispec)*&
-              viscodampx
-     accelw_poroelastic(2,iglob) = accelw_poroelastic(2,iglob) - wxgll(i)*wygll(j)*wzgll(k)*jacobian(i,j,k,ispec)*&
-              viscodampy
-     accelw_poroelastic(3,iglob) = accelw_poroelastic(3,iglob) - wxgll(i)*wygll(j)*wzgll(k)*jacobian(i,j,k,ispec)*&
-              viscodampz
-
-! if isolver == 1 .and. save_forward then b_viscodamp is save in compute_forces_fluid.f90
-!          if(isolver == 2) then ! kernels calculation
-!        b_accels_poroelastic(1,iglob) = b_accels_poroelastic(1,iglob) + phil/tortl*b_viscodampx(iglob)
-!        b_accels_poroelastic(2,iglob) = b_accels_poroelastic(2,iglob) + phil/tortl*b_viscodampz(iglob)
-!          endif
-
-!        enddo
-!      enddo
-!     enddo
-
-         endif ! if(etal_f >0.d0) then
-
-        enddo ! second loop over the GLL points
-      enddo
-    enddo
-
-    enddo ! end of loop over all spectral elements
-
-
-  end subroutine compute_forces_fluid
-

Copied: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_fluid_for_poro.f90 (from rev 21237, seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_fluid.f90)
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_fluid_for_poro.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_fluid_for_poro.f90	2013-01-16 18:04:01 UTC (rev 21239)
@@ -0,0 +1,566 @@
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 1
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and CNRS / INRIA / University of Pau
+! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
+!                             July 2012
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+  subroutine compute_forces_poro_fluid_part( iphase, &
+                        NSPEC_AB,NGLOB_AB,displw_poroelastic,accelw_poroelastic,&
+                        velocw_poroelastic,displs_poroelastic,&
+                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                        hprime_xx,hprime_yy,hprime_zz,&
+                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
+                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wxgll,wygll,wzgll,  &
+                        kappaarraystore,rhoarraystore,mustore,etastore,permstore, &
+                        phistore,tortstore,jacobian,ibool,&
+                        epsilonwdev_xx,epsilonwdev_yy,epsilonwdev_xy,&
+                        epsilonwdev_xz,epsilonwdev_yz,epsilonw_trace_over_3, &
+                        SIMULATION_TYPE,NSPEC_ADJOINT, &
+                        num_phase_ispec_poroelastic,nspec_inner_poroelastic,nspec_outer_poroelastic,&
+                        phase_ispec_inner_poroelastic )
+
+! compute forces for the fluid poroelastic part
+
+  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
+                      N_SLS, &
+                      ONE_THIRD,FOUR_THIRDS
+
+  implicit none
+
+  integer :: iphase
+  integer :: NSPEC_AB,NGLOB_AB
+
+! adjoint simulations
+  integer :: SIMULATION_TYPE
+  !integer :: NSPEC_BOUN
+  integer :: NSPEC_ADJOINT
+! adjoint wavefields
+!  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: &
+!    mufr_kl, B_kl
+
+! displacement and acceleration
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displw_poroelastic,accelw_poroelastic,&
+                                                      velocw_poroelastic
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displs_poroelastic
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: &
+       epsilonwdev_xx,epsilonwdev_yy,epsilonwdev_xy,epsilonwdev_xz,epsilonwdev_yz
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilonw_trace_over_3
+
+! arrays with mesh parameters per slice
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+        mustore,etastore,phistore,tortstore,jacobian
+  real(kind=CUSTOM_REAL), dimension(2,NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: rhoarraystore
+  real(kind=CUSTOM_REAL), dimension(3,NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: kappaarraystore
+  real(kind=CUSTOM_REAL), dimension(6,NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: permstore
+
+! Gauss-Lobatto-Legendre points of integration and weights
+  double precision, dimension(NGLLX) :: wxgll
+  double precision, dimension(NGLLY) :: wygll
+  double precision, dimension(NGLLZ) :: wzgll
+
+! array with derivatives of Lagrange polynomials and precalculated products
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
+  real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+!  logical,dimension(NSPEC_AB) :: ispec_is_elastic
+  integer :: num_phase_ispec_poroelastic,nspec_inner_poroelastic,nspec_outer_poroelastic
+  integer, dimension(num_phase_ispec_poroelastic,2) :: phase_ispec_inner_poroelastic
+
+!---
+!--- local variables
+!---
+
+  integer :: ispec,i,j,k,l,iglob,num_elements,ispec_p
+
+! spatial derivatives
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
+    tempx1p,tempx2p,tempx3p,tempy1p,tempy2p,tempy3p,tempz1p,tempz2p,tempz3p
+!    b_tempx1,b_tempx2,b_tempx3,b_tempy1,b_tempy2,b_tempy3,b_tempz1,b_tempz2,b_tempz3, &
+!    b_tempx1p,b_tempx2p,b_tempx3p,b_tempy1p,b_tempy2p,b_tempy3p,b_tempz1p,b_tempz2p,b_tempz3p
+
+  real(kind=CUSTOM_REAL) :: duxdxl,duydxl,duzdxl,duxdyl,duydyl,duzdyl,duxdzl,duydzl,duzdzl
+  real(kind=CUSTOM_REAL) :: dwxdxl,dwydxl,dwzdxl,dwxdyl,dwydyl,dwzdyl,dwxdzl,dwydzl,dwzdzl
+
+  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
+  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl_plus_duzdzl,dwxdxl_plus_dwydyl_plus_dwzdzl
+
+  real(kind=CUSTOM_REAL) hp1,hp2,hp3
+  real(kind=CUSTOM_REAL) fac1,fac2,fac3
+
+  real(kind=CUSTOM_REAL) tempx1ls,tempx2ls,tempx3ls,tempx1lw,tempx2lw,tempx3lw
+  real(kind=CUSTOM_REAL) tempy1ls,tempy2ls,tempy3ls,tempy1lw,tempy2lw,tempy3lw
+  real(kind=CUSTOM_REAL) tempz1ls,tempz2ls,tempz3ls,tempz1lw,tempz2lw,tempz3lw
+
+!  real(kind=CUSTOM_REAL) :: b_dux_dxl,b_duz_dxl,b_dux_dzl,b_duz_dzl
+!  real(kind=CUSTOM_REAL) :: dsxx,dsxy,dsxz,dsyy,dsyz,dszz
+!  real(kind=CUSTOM_REAL) :: b_dsxx,b_dsxy,b_dsxz,b_dsyy,b_dsyz,b_dszz
+!  real(kind=CUSTOM_REAL) :: b_dwx_dxl,b_dwz_dxl,b_dwx_dzl,b_dwz_dzl
+!  real(kind=CUSTOM_REAL) :: dwxx,dwxy,dwxz,dwyy,dwyz,dwzz
+!  real(kind=CUSTOM_REAL) :: b_dwxx,b_dwxy,b_dwxz,b_dwyy,b_dwyz,b_dwzz
+  real(kind=CUSTOM_REAL) :: sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy
+  real(kind=CUSTOM_REAL) :: sigmap
+!  real(kind=CUSTOM_REAL) :: b_sigma_xx,b_sigma_yy,b_sigma_zz,b_sigma_xy,b_sigma_xz,b_sigma_yz
+!  real(kind=CUSTOM_REAL) :: b_sigmap
+!  real(kind=CUSTOM_REAL) :: nx,nz,vx,vz,vn,vxf,vzf,vnf,rho_vpI,rho_vpII,rho_vs,tx,tz,weight,xxi,zxi,xgamma,zgamma,jacobian1D
+
+! viscous attenuation (poroelastic media)
+  real(kind=CUSTOM_REAL), dimension(6) :: bl_relaxed
+
+
+! Jacobian matrix and determinant
+  real(kind=CUSTOM_REAL) ::  xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+
+! material properties of the poroelastic medium
+!  real(kind=CUSTOM_REAL) :: mul_unrelaxed,lambdal_unrelaxed,lambdalplus2mul_unrelaxed
+  real(kind=CUSTOM_REAL) :: kappal_s,rhol_s
+  real(kind=CUSTOM_REAL) :: etal_f,kappal_f,rhol_f
+  real(kind=CUSTOM_REAL) :: mul_fr,kappal_fr,phil,tortl,viscodampx,viscodampy,viscodampz
+  real(kind=CUSTOM_REAL) :: permlxx,permlxy,permlxz,permlyz,permlyy,permlzz,&
+                            invpermlxx,invpermlxy,invpermlxz,invpermlyz,invpermlyy,invpermlzz,detk
+  real(kind=CUSTOM_REAL) :: D_biot,H_biot,C_biot,M_biot,rhol_bar
+
+  real(kind=CUSTOM_REAL) :: mul_G,lambdal_G,lambdalplus2mul_G
+!  real(kind=CUSTOM_REAL) :: cpIsquare,cpIIsquare,cssquare,cpIl,cpIIl,csl
+
+! for attenuation
+!  real(kind=CUSTOM_REAL) :: Un,Unp1,tauinv,Sn,Snp1,theta_n,theta_np1,tauinvsquare,tauinvcube,tauinvUn
+
+! compute Grad(displs_poroelastic) at time step n for attenuation
+!  if(TURN_ATTENUATION_ON) call compute_gradient_attenuation(displs_poroelastic,dux_dxl_n,duz_dxl_n, &
+!      dux_dzl_n,duz_dzl_n,xix,xiz,gammax,gammaz,ibool,poroelastic,hprime_xx,hprime_zz,nspec,npoin)
+
+
+  if( iphase == 1 ) then
+    num_elements = nspec_outer_poroelastic
+  else
+    num_elements = nspec_inner_poroelastic
+  endif
+
+! loop over spectral elements
+  do ispec_p = 1,num_elements
+
+        ispec = phase_ispec_inner_poroelastic(ispec_p,iphase)
+
+! first double loop over GLL points to compute and store gradients
+    do k=1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+
+! get poroelastic parameters of current local GLL
+    phil = phistore(i,j,k,ispec)
+    tortl = tortstore(i,j,k,ispec)
+!solid properties
+    kappal_s = kappaarraystore(1,i,j,k,ispec)
+    rhol_s = rhoarraystore(1,i,j,k,ispec)
+!fluid properties
+    kappal_f = kappaarraystore(2,i,j,k,ispec)
+    rhol_f = rhoarraystore(2,i,j,k,ispec)
+!frame properties
+    mul_fr = mustore(i,j,k,ispec)
+    kappal_fr = kappaarraystore(3,i,j,k,ispec)
+    rhol_bar =  (1._CUSTOM_REAL - phil)*rhol_s + phil*rhol_f
+!Biot coefficients for the input phi
+      D_biot = kappal_s*(1._CUSTOM_REAL + phil*(kappal_s/kappal_f - 1._CUSTOM_REAL))
+      H_biot = (kappal_s - kappal_fr)*(kappal_s - kappal_fr)/(D_biot - kappal_fr) + &
+                kappal_fr + 4._CUSTOM_REAL*mul_fr/3._CUSTOM_REAL
+      C_biot = kappal_s*(kappal_s - kappal_fr)/(D_biot - kappal_fr)
+      M_biot = kappal_s*kappal_s/(D_biot - kappal_fr)
+!The RHS has the form : div T -phi/c div T_f + phi/ceta_fk^-1.partial t w
+!where T = G:grad u_s + C_biot div w I
+!and T_f = C_biot div u_s I + M_biot div w I
+      mul_G = mul_fr
+      lambdal_G = H_biot - 2._CUSTOM_REAL*mul_fr
+      lambdalplus2mul_G = lambdal_G + 2._CUSTOM_REAL*mul_G
+
+! derivative along x,y,z for u_s and w
+              tempx1ls = 0.
+              tempx2ls = 0.
+              tempx3ls = 0.
+
+              tempy1ls = 0.
+              tempy2ls = 0.
+              tempy3ls = 0.
+
+              tempz1ls = 0.
+              tempz2ls = 0.
+              tempz3ls = 0.
+
+              tempx1lw = 0.
+              tempx2lw = 0.
+              tempx3lw = 0.
+
+              tempy1lw = 0.
+              tempy2lw = 0.
+              tempy3lw = 0.
+
+              tempz1lw = 0.
+              tempz2lw = 0.
+              tempz3lw = 0.
+
+! first double loop over GLL points to compute and store gradients
+          do l = 1,NGLLX
+                hp1 = hprime_xx(i,l)
+                iglob = ibool(l,j,k,ispec)
+                tempx1ls = tempx1ls + displs_poroelastic(1,iglob)*hp1
+                tempy1ls = tempy1ls + displs_poroelastic(2,iglob)*hp1
+                tempz1ls = tempz1ls + displs_poroelastic(3,iglob)*hp1
+                tempx1lw = tempx1lw + displw_poroelastic(1,iglob)*hp1
+                tempy1lw = tempy1lw + displw_poroelastic(2,iglob)*hp1
+                tempz1lw = tempz1lw + displw_poroelastic(3,iglob)*hp1
+    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+                hp2 = hprime_yy(j,l)
+                iglob = ibool(i,l,k,ispec)
+                tempx2ls = tempx2ls + displs_poroelastic(1,iglob)*hp2
+                tempy2ls = tempy2ls + displs_poroelastic(2,iglob)*hp2
+                tempz2ls = tempz2ls + displs_poroelastic(3,iglob)*hp2
+                tempx2lw = tempx2lw + displw_poroelastic(1,iglob)*hp2
+                tempy2lw = tempy2lw + displw_poroelastic(2,iglob)*hp2
+                tempz2lw = tempz2lw + displw_poroelastic(3,iglob)*hp2
+    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+                hp3 = hprime_zz(k,l)
+                iglob = ibool(i,j,l,ispec)
+                tempx3ls = tempx3ls + displs_poroelastic(1,iglob)*hp3
+                tempy3ls = tempy3ls + displs_poroelastic(2,iglob)*hp3
+                tempz3ls = tempz3ls + displs_poroelastic(3,iglob)*hp3
+                tempx3lw = tempx3lw + displw_poroelastic(1,iglob)*hp3
+                tempy3lw = tempy3lw + displw_poroelastic(2,iglob)*hp3
+                tempz3lw = tempz3lw + displw_poroelastic(3,iglob)*hp3
+          enddo
+
+              xixl = xix(i,j,k,ispec)
+              xiyl = xiy(i,j,k,ispec)
+              xizl = xiz(i,j,k,ispec)
+              etaxl = etax(i,j,k,ispec)
+              etayl = etay(i,j,k,ispec)
+              etazl = etaz(i,j,k,ispec)
+              gammaxl = gammax(i,j,k,ispec)
+              gammayl = gammay(i,j,k,ispec)
+              gammazl = gammaz(i,j,k,ispec)
+              jacobianl = jacobian(i,j,k,ispec)
+
+! derivatives of displacement
+              duxdxl = xixl*tempx1ls + etaxl*tempx2ls + gammaxl*tempx3ls
+              duxdyl = xiyl*tempx1ls + etayl*tempx2ls + gammayl*tempx3ls
+              duxdzl = xizl*tempx1ls + etazl*tempx2ls + gammazl*tempx3ls
+
+              duydxl = xixl*tempy1ls + etaxl*tempy2ls + gammaxl*tempy3ls
+              duydyl = xiyl*tempy1ls + etayl*tempy2ls + gammayl*tempy3ls
+              duydzl = xizl*tempy1ls + etazl*tempy2ls + gammazl*tempy3ls
+
+              duzdxl = xixl*tempz1ls + etaxl*tempz2ls + gammaxl*tempz3ls
+              duzdyl = xiyl*tempz1ls + etayl*tempz2ls + gammayl*tempz3ls
+              duzdzl = xizl*tempz1ls + etazl*tempz2ls + gammazl*tempz3ls
+
+              dwxdxl = xixl*tempx1lw + etaxl*tempx2lw + gammaxl*tempx3lw
+              dwxdyl = xiyl*tempx1lw + etayl*tempx2lw + gammayl*tempx3lw
+              dwxdzl = xizl*tempx1lw + etazl*tempx2lw + gammazl*tempx3lw
+
+              dwydxl = xixl*tempy1lw + etaxl*tempy2lw + gammaxl*tempy3lw
+              dwydyl = xiyl*tempy1lw + etayl*tempy2lw + gammayl*tempy3lw
+              dwydzl = xizl*tempy1lw + etazl*tempy2lw + gammazl*tempy3lw
+
+              dwzdxl = xixl*tempz1lw + etaxl*tempz2lw + gammaxl*tempz3lw
+              dwzdyl = xiyl*tempz1lw + etayl*tempz2lw + gammayl*tempz3lw
+              dwzdzl = xizl*tempz1lw + etazl*tempz2lw + gammazl*tempz3lw
+
+    ! precompute some sums to save CPU time
+              duxdxl_plus_duydyl_plus_duzdzl = duxdxl + duydyl + duzdzl
+              dwxdxl_plus_dwydyl_plus_dwzdzl = dwxdxl + dwydyl + dwzdzl
+              duxdxl_plus_duydyl = duxdxl + duydyl
+              duxdxl_plus_duzdzl = duxdxl + duzdzl
+              duydyl_plus_duzdzl = duydyl + duzdzl
+              duxdyl_plus_duydxl = duxdyl + duydxl
+              duzdxl_plus_duxdzl = duzdxl + duxdzl
+              duzdyl_plus_duydzl = duzdyl + duydzl
+
+! compute stress tensor (include attenuation or anisotropy if needed)
+
+!  if(VISCOATTENUATION) then
+!chris:check
+
+! Dissipation only controlled by frame share attenuation in poroelastic (see Morency & Tromp, GJI 2008).
+! attenuation is implemented following the memory variable formulation of
+! J. M. Carcione, Seismic modeling in viscoelastic media, Geophysics,
+! vol. 58(1), p. 110-120 (1993). More details can be found in
+! J. M. Carcione, D. Kosloff and R. Kosloff, Wave propagation simulation in a linear
+! viscoelastic medium, Geophysical Journal International, vol. 95, p. 597-611 (1988).
+
+!  else
+
+! no attenuation
+    sigma_xx = lambdalplus2mul_G*duxdxl + lambdal_G*duydyl_plus_duzdzl + C_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
+    sigma_yy = lambdalplus2mul_G*duydyl + lambdal_G*duxdxl_plus_duzdzl + C_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
+    sigma_zz = lambdalplus2mul_G*duzdzl + lambdal_G*duxdxl_plus_duydyl + C_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
+
+    sigma_xy = mul_G*duxdyl_plus_duydxl
+    sigma_xz = mul_G*duzdxl_plus_duxdzl
+    sigma_yz = mul_G*duzdyl_plus_duydzl
+
+    sigmap = C_biot*duxdxl_plus_duydyl_plus_duzdzl + M_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
+
+          if(SIMULATION_TYPE == 3) then ! kernels calculation
+    epsilonw_trace_over_3(i,j,k,ispec) = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+    epsilonwdev_xx(i,j,k,ispec) = duxdxl - ONE_THIRD * (duxdxl + duydyl + duzdzl)
+    epsilonwdev_yy(i,j,k,ispec) = duydyl - ONE_THIRD * (duxdxl + duydyl + duzdzl)
+    epsilonwdev_xy(i,j,k,ispec) = 0.5 * duxdyl_plus_duydxl
+    epsilonwdev_xz(i,j,k,ispec) = 0.5 * duzdxl_plus_duxdzl
+    epsilonwdev_yz(i,j,k,ispec) = 0.5 * duzdyl_plus_duydzl
+          endif
+!  endif !if(VISCOATTENUATION)
+
+! weak formulation term based on stress tensor (non-symmetric form)
+            ! define symmetric components of sigma
+            sigma_yx = sigma_xy
+            sigma_zx = sigma_xz
+            sigma_zy = sigma_yz
+
+            ! form dot product with test vector, non-symmetric form (which is useful in the case of PML)
+            tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
+            tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
+            tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
+
+            tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
+            tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
+            tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
+
+            tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
+            tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
+            tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
+
+          tempx1p(i,j,k) = jacobianl * sigmap*xixl
+          tempy1p(i,j,k) = jacobianl * sigmap*xiyl
+          tempz1p(i,j,k) = jacobianl * sigmap*xizl
+
+          tempx2p(i,j,k) = jacobianl * sigmap*etaxl
+          tempy2p(i,j,k) = jacobianl * sigmap*etayl
+          tempz2p(i,j,k) = jacobianl * sigmap*etazl
+
+          tempx3p(i,j,k) = jacobianl * sigmap*gammaxl
+          tempy3p(i,j,k) = jacobianl * sigmap*gammayl
+          tempz3p(i,j,k) = jacobianl * sigmap*gammazl
+
+        enddo
+      enddo
+    enddo
+
+!
+! second double-loop over GLL to compute all the terms
+!
+    do k = 1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+
+              tempx1ls = 0.
+              tempy1ls = 0.
+              tempz1ls = 0.
+
+              tempx2ls = 0.
+              tempy2ls = 0.
+              tempz2ls = 0.
+
+              tempx3ls = 0.
+              tempy3ls = 0.
+              tempz3ls = 0.
+
+              tempx1lw = 0.
+              tempy1lw = 0.
+              tempz1lw = 0.
+
+              tempx2lw = 0.
+              tempy2lw = 0.
+              tempz2lw = 0.
+
+              tempx3lw = 0.
+              tempy3lw = 0.
+              tempz3lw = 0.
+
+              do l=1,NGLLX
+                fac1 = hprimewgll_xx(l,i)
+                tempx1ls = tempx1ls + tempx1(l,j,k)*fac1
+                tempy1ls = tempy1ls + tempy1(l,j,k)*fac1
+                tempz1ls = tempz1ls + tempz1(l,j,k)*fac1
+                tempx1lw = tempx1lw + tempx1p(l,j,k)*fac1
+                tempy1lw = tempy1lw + tempy1p(l,j,k)*fac1
+                tempz1lw = tempz1lw + tempz1p(l,j,k)*fac1
+                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+                fac2 = hprimewgll_yy(l,j)
+                tempx2ls = tempx2ls + tempx2(i,l,k)*fac2
+                tempy2ls = tempy2ls + tempy2(i,l,k)*fac2
+                tempz2ls = tempz2ls + tempz2(i,l,k)*fac2
+                tempx2lw = tempx2lw + tempx2p(i,l,k)*fac2
+                tempy2lw = tempy2lw + tempy2p(i,l,k)*fac2
+                tempz2lw = tempz2lw + tempz2p(i,l,k)*fac2
+                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+                fac3 = hprimewgll_zz(l,k)
+                tempx3ls = tempx3ls + tempx3(i,j,l)*fac3
+                tempy3ls = tempy3ls + tempy3(i,j,l)*fac3
+                tempz3ls = tempz3ls + tempz3(i,j,l)*fac3
+                tempx3lw = tempx3lw + tempx3p(i,j,l)*fac3
+                tempy3lw = tempy3lw + tempy3p(i,j,l)*fac3
+                tempz3lw = tempz3lw + tempz3p(i,j,l)*fac3
+              enddo
+
+              fac1 = wgllwgll_yz(j,k)
+              fac2 = wgllwgll_xz(i,k)
+              fac3 = wgllwgll_xy(i,j)
+
+! get poroelastic parameters of current local GLL
+    phil = phistore(i,j,k,ispec)
+!solid properties
+    rhol_s = rhoarraystore(1,i,j,k,ispec)
+!fluid properties
+    rhol_f = rhoarraystore(2,i,j,k,ispec)
+!frame properties
+    rhol_bar =  (1._CUSTOM_REAL - phil)*rhol_s + phil*rhol_f
+
+    ! sum contributions from each element to the global mesh
+
+              iglob = ibool(i,j,k,ispec)
+
+
+    accelw_poroelastic(1,iglob) = accelw_poroelastic(1,iglob) + ( fac1*(rhol_f/rhol_bar*tempx1ls - tempx1lw) &
+           + fac2*(rhol_f/rhol_bar*tempx2ls - tempx2lw) + fac3*(rhol_f/rhol_bar*tempx3ls - tempx3lw) )
+
+    accelw_poroelastic(2,iglob) = accelw_poroelastic(2,iglob) + ( fac1*(rhol_f/rhol_bar*tempy1ls - tempy1lw) &
+           + fac2*(rhol_f/rhol_bar*tempy2ls - tempy2lw) + fac3*(rhol_f/rhol_bar*tempy3ls - tempy3lw) )
+
+    accelw_poroelastic(3,iglob) = accelw_poroelastic(3,iglob) + ( fac1*(rhol_f/rhol_bar*tempz1ls - tempz1lw) &
+           + fac2*(rhol_f/rhol_bar*tempz2ls - tempz2lw) + fac3*(rhol_f/rhol_bar*tempz3ls - tempz3lw) )
+
+
+!
+!---- viscous damping
+!
+! add + phi/tort eta_f k^-1 dot(w)
+
+    etal_f = etastore(i,j,k,ispec)
+
+      if(etal_f >0.d0) then
+
+    permlxx = permstore(1,i,j,k,ispec)
+    permlxy = permstore(2,i,j,k,ispec)
+    permlxz = permstore(3,i,j,k,ispec)
+    permlyy = permstore(4,i,j,k,ispec)
+    permlyz = permstore(5,i,j,k,ispec)
+    permlzz = permstore(6,i,j,k,ispec)
+
+! calcul of the inverse of k
+    detk = permlxz*(permlxy*permlyz-permlxz*permlyy) &
+         - permlxy*(permlxy*permlzz-permlyz*permlxz) &
+         + permlxx*(permlyy*permlzz-permlyz*permlyz)
+
+    if(detk /= 0.d0) then
+     invpermlxx = (permlyy*permlzz-permlyz*permlyz)/detk
+     invpermlxy = (permlxz*permlyz-permlxy*permlzz)/detk
+     invpermlxz = (permlxy*permlyz-permlxz*permlyy)/detk
+     invpermlyy = (permlxx*permlzz-permlxz*permlxz)/detk
+     invpermlyz = (permlxy*permlxz-permlxx*permlyz)/detk
+     invpermlzz = (permlxx*permlyy-permlxy*permlxy)/detk
+    else
+      stop 'Permeability matrix is not inversible'
+    endif
+
+! relaxed viscous coef
+          bl_relaxed(1) = etal_f*invpermlxx
+          bl_relaxed(2) = etal_f*invpermlxy
+          bl_relaxed(3) = etal_f*invpermlxz
+          bl_relaxed(4) = etal_f*invpermlyy
+          bl_relaxed(5) = etal_f*invpermlyz
+          bl_relaxed(6) = etal_f*invpermlzz
+
+!    if(VISCOATTENUATION) then
+!          bl_unrelaxed(1) = etal_f*invpermlxx*theta_e/theta_s
+!          bl_unrelaxed(2) = etal_f*invpermlxz*theta_e/theta_s
+!          bl_unrelaxed(3) = etal_f*invpermlzz*theta_e/theta_s
+!    endif
+
+!     do k = 1,NGLLZ
+!      do j = 1,NGLLY
+!        do i = 1,NGLLX
+
+!              iglob = ibool(i,j,k,ispec)
+
+!     if(VISCOATTENUATION) then
+! compute the viscous damping term with the unrelaxed viscous coef and add memory variable
+!      viscodampx = velocw_poroelastic(1,iglob)*bl_unrelaxed(1) + velocw_poroelastic(2,iglob)*bl_unrelaxed(2)&
+!                  - rx_viscous(i,j,ispec)
+!      viscodampz = velocw_poroelastic(1,iglob)*bl_unrelaxed(2) + velocw_poroelastic(2,iglob)*bl_unrelaxed(3)&
+!                  - rz_viscous(i,j,ispec)
+!     else
+
+! no viscous attenuation
+      viscodampx = velocw_poroelastic(1,iglob)*bl_relaxed(1) + velocw_poroelastic(2,iglob)*bl_relaxed(2) + &
+                   velocw_poroelastic(3,iglob)*bl_relaxed(3)
+      viscodampy = velocw_poroelastic(1,iglob)*bl_relaxed(2) + velocw_poroelastic(2,iglob)*bl_relaxed(4) + &
+                   velocw_poroelastic(3,iglob)*bl_relaxed(5)
+      viscodampz = velocw_poroelastic(1,iglob)*bl_relaxed(3) + velocw_poroelastic(2,iglob)*bl_relaxed(5) + &
+                   velocw_poroelastic(3,iglob)*bl_relaxed(6)
+!     endif
+
+     accelw_poroelastic(1,iglob) = accelw_poroelastic(1,iglob) - wxgll(i)*wygll(j)*wzgll(k)*jacobian(i,j,k,ispec)*&
+              viscodampx
+     accelw_poroelastic(2,iglob) = accelw_poroelastic(2,iglob) - wxgll(i)*wygll(j)*wzgll(k)*jacobian(i,j,k,ispec)*&
+              viscodampy
+     accelw_poroelastic(3,iglob) = accelw_poroelastic(3,iglob) - wxgll(i)*wygll(j)*wzgll(k)*jacobian(i,j,k,ispec)*&
+              viscodampz
+
+! if isolver == 1 .and. save_forward then b_viscodamp is saved in compute_forces_poro_fluid_part.f90
+!          if(isolver == 2) then ! kernels calculation
+!        b_accels_poroelastic(1,iglob) = b_accels_poroelastic(1,iglob) + phil/tortl*b_viscodampx(iglob)
+!        b_accels_poroelastic(2,iglob) = b_accels_poroelastic(2,iglob) + phil/tortl*b_viscodampz(iglob)
+!          endif
+
+!        enddo
+!      enddo
+!     enddo
+
+         endif ! if(etal_f >0.d0) then
+
+        enddo ! second loop over the GLL points
+      enddo
+    enddo
+
+    enddo ! end of loop over all spectral elements
+
+
+  end subroutine compute_forces_poro_fluid_part
+

Copied: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_poro_solid_part.f90 (from rev 21237, seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_solid.f90)
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_poro_solid_part.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_poro_solid_part.f90	2013-01-16 18:04:01 UTC (rev 21239)
@@ -0,0 +1,560 @@
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 1
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and CNRS / INRIA / University of Pau
+! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
+!                             July 2012
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+  subroutine compute_forces_poro_solid_part( iphase, &
+                        NSPEC_AB,NGLOB_AB,displs_poroelastic,accels_poroelastic,&
+                        displw_poroelastic,velocw_poroelastic,&
+                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                        hprime_xx,hprime_yy,hprime_zz,&
+                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
+                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wxgll,wygll,wzgll,  &
+                        kappaarraystore,rhoarraystore,mustore,etastore,permstore, &
+                        phistore,tortstore,jacobian,ibool,&
+                        epsilonsdev_xx,epsilonsdev_yy,epsilonsdev_xy,&
+                        epsilonsdev_xz,epsilonsdev_yz,epsilons_trace_over_3, &
+                        SIMULATION_TYPE,NSPEC_ADJOINT, &
+                        num_phase_ispec_poroelastic,nspec_inner_poroelastic,nspec_outer_poroelastic,&
+                        phase_ispec_inner_poroelastic )
+
+! compute forces for the solid poroelastic part
+
+  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
+                      N_SLS, &
+                      ONE_THIRD,FOUR_THIRDS
+
+  implicit none
+
+  integer :: iphase
+  integer :: NSPEC_AB,NGLOB_AB
+
+! adjoint simulations
+  integer :: SIMULATION_TYPE
+  !integer :: NSPEC_BOUN
+  integer :: NSPEC_ADJOINT
+! adjoint wavefields
+!  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: &
+!    mufr_kl, B_kl
+
+! displacement and acceleration
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displs_poroelastic,accels_poroelastic
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displw_poroelastic,velocw_poroelastic
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: &
+       epsilonsdev_xx,epsilonsdev_yy,epsilonsdev_xy,epsilonsdev_xz,epsilonsdev_yz
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilons_trace_over_3
+
+! arrays with mesh parameters per slice
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+        mustore,etastore,phistore,tortstore,jacobian
+  real(kind=CUSTOM_REAL), dimension(2,NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: rhoarraystore
+  real(kind=CUSTOM_REAL), dimension(3,NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: kappaarraystore
+  real(kind=CUSTOM_REAL), dimension(6,NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: permstore
+
+! Gauss-Lobatto-Legendre points of integration and weights
+  double precision, dimension(NGLLX) :: wxgll
+  double precision, dimension(NGLLY) :: wygll
+  double precision, dimension(NGLLZ) :: wzgll
+
+! array with derivatives of Lagrange polynomials and precalculated products
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
+  real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+!  logical,dimension(NSPEC_AB) :: ispec_is_elastic
+  integer :: num_phase_ispec_poroelastic,nspec_inner_poroelastic,nspec_outer_poroelastic
+  integer, dimension(num_phase_ispec_poroelastic,2) :: phase_ispec_inner_poroelastic
+
+! local parameters
+
+  integer :: ispec,i,j,k,l,iglob,num_elements,ispec_p
+
+! spatial derivatives
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
+    tempx1p,tempx2p,tempx3p,tempy1p,tempy2p,tempy3p,tempz1p,tempz2p,tempz3p
+!    b_tempx1,b_tempx2,b_tempx3,b_tempy1,b_tempy2,b_tempy3,b_tempz1,b_tempz2,b_tempz3, &
+!    b_tempx1p,b_tempx2p,b_tempx3p,b_tempy1p,b_tempy2p,b_tempy3p,b_tempz1p,b_tempz2p,b_tempz3p
+
+  real(kind=CUSTOM_REAL) :: duxdxl,duydxl,duzdxl,duxdyl,duydyl,duzdyl,duxdzl,duydzl,duzdzl
+  real(kind=CUSTOM_REAL) :: dwxdxl,dwydxl,dwzdxl,dwxdyl,dwydyl,dwzdyl,dwxdzl,dwydzl,dwzdzl
+
+  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
+  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl_plus_duzdzl,dwxdxl_plus_dwydyl_plus_dwzdzl
+
+  real(kind=CUSTOM_REAL) hp1,hp2,hp3
+  real(kind=CUSTOM_REAL) fac1,fac2,fac3
+
+  real(kind=CUSTOM_REAL) tempx1ls,tempx2ls,tempx3ls,tempx1lw,tempx2lw,tempx3lw
+  real(kind=CUSTOM_REAL) tempy1ls,tempy2ls,tempy3ls,tempy1lw,tempy2lw,tempy3lw
+  real(kind=CUSTOM_REAL) tempz1ls,tempz2ls,tempz3ls,tempz1lw,tempz2lw,tempz3lw
+
+!  real(kind=CUSTOM_REAL) :: b_dux_dxl,b_duz_dxl,b_dux_dzl,b_duz_dzl
+!  real(kind=CUSTOM_REAL) :: dsxx,dsxy,dsxz,dsyy,dsyz,dszz
+!  real(kind=CUSTOM_REAL) :: b_dsxx,b_dsxy,b_dsxz,b_dsyy,b_dsyz,b_dszz
+!  real(kind=CUSTOM_REAL) :: b_dwx_dxl,b_dwz_dxl,b_dwx_dzl,b_dwz_dzl
+!  real(kind=CUSTOM_REAL) :: dwxx,dwxy,dwxz,dwyy,dwyz,dwzz
+!  real(kind=CUSTOM_REAL) :: b_dwxx,b_dwxy,b_dwxz,b_dwyy,b_dwyz,b_dwzz
+  real(kind=CUSTOM_REAL) :: sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy
+  real(kind=CUSTOM_REAL) :: sigmap
+!  real(kind=CUSTOM_REAL) :: b_sigma_xx,b_sigma_yy,b_sigma_zz,b_sigma_xy,b_sigma_xz,b_sigma_yz
+!  real(kind=CUSTOM_REAL) :: b_sigmap
+!  real(kind=CUSTOM_REAL) :: nx,nz,vx,vz,vn,vxf,vzf,vnf,rho_vpI,rho_vpII,rho_vs,tx,tz,weight,xxi,zxi,xgamma,zgamma,jacobian1D
+
+! viscous attenuation (poroelastic media)
+  real(kind=CUSTOM_REAL), dimension(6) :: bl_relaxed
+
+
+! Jacobian matrix and determinant
+  real(kind=CUSTOM_REAL) ::  xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+
+! material properties of the poroelastic medium
+!  real(kind=CUSTOM_REAL) :: mul_unrelaxed,lambdal_unrelaxed,lambdalplus2mul_unrelaxed
+  real(kind=CUSTOM_REAL) :: kappal_s,rhol_s
+  real(kind=CUSTOM_REAL) :: etal_f,kappal_f,rhol_f
+  real(kind=CUSTOM_REAL) :: mul_fr,kappal_fr,phil,tortl,viscodampx,viscodampy,viscodampz
+  real(kind=CUSTOM_REAL) :: permlxx,permlxy,permlxz,permlyz,permlyy,permlzz,&
+                            invpermlxx,invpermlxy,invpermlxz,invpermlyz,invpermlyy,invpermlzz,detk
+  real(kind=CUSTOM_REAL) :: D_biot,H_biot,C_biot,M_biot,rhol_bar
+
+  real(kind=CUSTOM_REAL) :: mul_G,lambdal_G,lambdalplus2mul_G
+!  real(kind=CUSTOM_REAL) :: cpIsquare,cpIIsquare,cssquare,cpIl,cpIIl,csl
+
+! for attenuation
+!  real(kind=CUSTOM_REAL) :: Un,Unp1,tauinv,Sn,Snp1,theta_n,theta_np1,tauinvsquare,tauinvcube,tauinvUn
+
+
+! compute Grad(displs_poroelastic) at time step n for attenuation
+!  if(TURN_ATTENUATION_ON) call compute_gradient_attenuation(displs_poroelastic,dux_dxl_n,duz_dxl_n, &
+!      dux_dzl_n,duz_dzl_n,xix,xiz,gammax,gammaz,ibool,poroelastic,hprime_xx,hprime_zz,nspec,npoin)
+
+
+  if( iphase == 1 ) then
+    num_elements = nspec_outer_poroelastic
+  else
+    num_elements = nspec_inner_poroelastic
+  endif
+
+! loop over spectral elements
+  do ispec_p = 1,num_elements
+
+        ispec = phase_ispec_inner_poroelastic(ispec_p,iphase)
+
+! first double loop over GLL points to compute and store gradients
+    do k=1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+
+! get poroelastic parameters of current local GLL
+    phil = phistore(i,j,k,ispec)
+    tortl = tortstore(i,j,k,ispec)
+!solid properties
+    kappal_s = kappaarraystore(1,i,j,k,ispec)
+    rhol_s = rhoarraystore(1,i,j,k,ispec)
+!fluid properties
+    kappal_f = kappaarraystore(2,i,j,k,ispec)
+    rhol_f = rhoarraystore(2,i,j,k,ispec)
+!frame properties
+    mul_fr = mustore(i,j,k,ispec)
+    kappal_fr = kappaarraystore(3,i,j,k,ispec)
+    rhol_bar =  (1._CUSTOM_REAL - phil)*rhol_s + phil*rhol_f
+!Biot coefficients for the input phi
+      D_biot = kappal_s*(1._CUSTOM_REAL + phil*(kappal_s/kappal_f - 1._CUSTOM_REAL))
+      H_biot = (kappal_s - kappal_fr)*(kappal_s - kappal_fr)/(D_biot - kappal_fr) + &
+                kappal_fr + 4._CUSTOM_REAL*mul_fr/3._CUSTOM_REAL
+      C_biot = kappal_s*(kappal_s - kappal_fr)/(D_biot - kappal_fr)
+      M_biot = kappal_s*kappal_s/(D_biot - kappal_fr)
+!The RHS has the form : div T -phi/c div T_f + phi/ceta_fk^-1.partial t w
+!where T = G:grad u_s + C_biot div w I
+!and T_f = C_biot div u_s I + M_biot div w I
+      mul_G = mul_fr
+      lambdal_G = H_biot - 2._CUSTOM_REAL*mul_fr
+      lambdalplus2mul_G = lambdal_G + 2._CUSTOM_REAL*mul_G
+
+! derivative along x,y,z for u_s and w
+              tempx1ls = 0.
+              tempx2ls = 0.
+              tempx3ls = 0.
+
+              tempy1ls = 0.
+              tempy2ls = 0.
+              tempy3ls = 0.
+
+              tempz1ls = 0.
+              tempz2ls = 0.
+              tempz3ls = 0.
+
+              tempx1lw = 0.
+              tempx2lw = 0.
+              tempx3lw = 0.
+
+              tempy1lw = 0.
+              tempy2lw = 0.
+              tempy3lw = 0.
+
+              tempz1lw = 0.
+              tempz2lw = 0.
+              tempz3lw = 0.
+
+! first double loop over GLL points to compute and store gradients
+          do l = 1,NGLLX
+                hp1 = hprime_xx(i,l)
+                iglob = ibool(l,j,k,ispec)
+                tempx1ls = tempx1ls + displs_poroelastic(1,iglob)*hp1
+                tempy1ls = tempy1ls + displs_poroelastic(2,iglob)*hp1
+                tempz1ls = tempz1ls + displs_poroelastic(3,iglob)*hp1
+                tempx1lw = tempx1lw + displw_poroelastic(1,iglob)*hp1
+                tempy1lw = tempy1lw + displw_poroelastic(2,iglob)*hp1
+                tempz1lw = tempz1lw + displw_poroelastic(3,iglob)*hp1
+    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+                hp2 = hprime_yy(j,l)
+                iglob = ibool(i,l,k,ispec)
+                tempx2ls = tempx2ls + displs_poroelastic(1,iglob)*hp2
+                tempy2ls = tempy2ls + displs_poroelastic(2,iglob)*hp2
+                tempz2ls = tempz2ls + displs_poroelastic(3,iglob)*hp2
+                tempx2lw = tempx2lw + displw_poroelastic(1,iglob)*hp2
+                tempy2lw = tempy2lw + displw_poroelastic(2,iglob)*hp2
+                tempz2lw = tempz2lw + displw_poroelastic(3,iglob)*hp2
+    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+                hp3 = hprime_zz(k,l)
+                iglob = ibool(i,j,l,ispec)
+                tempx3ls = tempx3ls + displs_poroelastic(1,iglob)*hp3
+                tempy3ls = tempy3ls + displs_poroelastic(2,iglob)*hp3
+                tempz3ls = tempz3ls + displs_poroelastic(3,iglob)*hp3
+                tempx3lw = tempx3lw + displw_poroelastic(1,iglob)*hp3
+                tempy3lw = tempy3lw + displw_poroelastic(2,iglob)*hp3
+                tempz3lw = tempz3lw + displw_poroelastic(3,iglob)*hp3
+          enddo
+
+              xixl = xix(i,j,k,ispec)
+              xiyl = xiy(i,j,k,ispec)
+              xizl = xiz(i,j,k,ispec)
+              etaxl = etax(i,j,k,ispec)
+              etayl = etay(i,j,k,ispec)
+              etazl = etaz(i,j,k,ispec)
+              gammaxl = gammax(i,j,k,ispec)
+              gammayl = gammay(i,j,k,ispec)
+              gammazl = gammaz(i,j,k,ispec)
+              jacobianl = jacobian(i,j,k,ispec)
+
+! derivatives of displacement
+              duxdxl = xixl*tempx1ls + etaxl*tempx2ls + gammaxl*tempx3ls
+              duxdyl = xiyl*tempx1ls + etayl*tempx2ls + gammayl*tempx3ls
+              duxdzl = xizl*tempx1ls + etazl*tempx2ls + gammazl*tempx3ls
+
+              duydxl = xixl*tempy1ls + etaxl*tempy2ls + gammaxl*tempy3ls
+              duydyl = xiyl*tempy1ls + etayl*tempy2ls + gammayl*tempy3ls
+              duydzl = xizl*tempy1ls + etazl*tempy2ls + gammazl*tempy3ls
+
+              duzdxl = xixl*tempz1ls + etaxl*tempz2ls + gammaxl*tempz3ls
+              duzdyl = xiyl*tempz1ls + etayl*tempz2ls + gammayl*tempz3ls
+              duzdzl = xizl*tempz1ls + etazl*tempz2ls + gammazl*tempz3ls
+
+              dwxdxl = xixl*tempx1lw + etaxl*tempx2lw + gammaxl*tempx3lw
+              dwxdyl = xiyl*tempx1lw + etayl*tempx2lw + gammayl*tempx3lw
+              dwxdzl = xizl*tempx1lw + etazl*tempx2lw + gammazl*tempx3lw
+
+              dwydxl = xixl*tempy1lw + etaxl*tempy2lw + gammaxl*tempy3lw
+              dwydyl = xiyl*tempy1lw + etayl*tempy2lw + gammayl*tempy3lw
+              dwydzl = xizl*tempy1lw + etazl*tempy2lw + gammazl*tempy3lw
+
+              dwzdxl = xixl*tempz1lw + etaxl*tempz2lw + gammaxl*tempz3lw
+              dwzdyl = xiyl*tempz1lw + etayl*tempz2lw + gammayl*tempz3lw
+              dwzdzl = xizl*tempz1lw + etazl*tempz2lw + gammazl*tempz3lw
+
+    ! precompute some sums to save CPU time
+              duxdxl_plus_duydyl_plus_duzdzl = duxdxl + duydyl + duzdzl
+              dwxdxl_plus_dwydyl_plus_dwzdzl = dwxdxl + dwydyl + dwzdzl
+              duxdxl_plus_duydyl = duxdxl + duydyl
+              duxdxl_plus_duzdzl = duxdxl + duzdzl
+              duydyl_plus_duzdzl = duydyl + duzdzl
+              duxdyl_plus_duydxl = duxdyl + duydxl
+              duzdxl_plus_duxdzl = duzdxl + duxdzl
+              duzdyl_plus_duydzl = duzdyl + duydzl
+
+! compute stress tensor (include attenuation or anisotropy if needed)
+
+!  if(VISCOATTENUATION) then
+!chris:check
+
+! Dissipation only controlled by frame share attenuation in poroelastic (see Morency & Tromp, GJI 2008).
+! attenuation is implemented following the memory variable formulation of
+! J. M. Carcione, Seismic modeling in viscoelastic media, Geophysics,
+! vol. 58(1), p. 110-120 (1993). More details can be found in
+! J. M. Carcione, D. Kosloff and R. Kosloff, Wave propagation simulation in a linear
+! viscoelastic medium, Geophysical Journal International, vol. 95, p. 597-611 (1988).
+
+!  else
+
+! no attenuation
+    sigma_xx = lambdalplus2mul_G*duxdxl + lambdal_G*duydyl_plus_duzdzl + C_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
+    sigma_yy = lambdalplus2mul_G*duydyl + lambdal_G*duxdxl_plus_duzdzl + C_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
+    sigma_zz = lambdalplus2mul_G*duzdzl + lambdal_G*duxdxl_plus_duydyl + C_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
+
+    sigma_xy = mul_G*duxdyl_plus_duydxl
+    sigma_xz = mul_G*duzdxl_plus_duxdzl
+    sigma_yz = mul_G*duzdyl_plus_duydzl
+
+    sigmap = C_biot*duxdxl_plus_duydyl_plus_duzdzl + M_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
+
+!  endif !if(ATTENUATION)
+
+          if(SIMULATION_TYPE == 3) then ! kernels calculation
+    epsilons_trace_over_3(i,j,k,ispec) = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+    epsilonsdev_xx(i,j,k,ispec) = duxdxl - ONE_THIRD * (duxdxl + duydyl + duzdzl)
+    epsilonsdev_yy(i,j,k,ispec) = duydyl - ONE_THIRD * (duxdxl + duydyl + duzdzl)
+    epsilonsdev_xy(i,j,k,ispec) = 0.5 * duxdyl_plus_duydxl
+    epsilonsdev_xz(i,j,k,ispec) = 0.5 * duzdxl_plus_duxdzl
+    epsilonsdev_yz(i,j,k,ispec) = 0.5 * duzdyl_plus_duydzl
+          endif
+
+
+! weak formulation term based on stress tensor (non-symmetric form)
+            ! define symmetric components of sigma
+            sigma_yx = sigma_xy
+            sigma_zx = sigma_xz
+            sigma_zy = sigma_yz
+
+            ! form dot product with test vector, non-symmetric form (which is useful in the case of PML)
+            tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
+            tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
+            tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
+
+            tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
+            tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
+            tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
+
+            tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
+            tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
+            tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
+
+          tempx1p(i,j,k) = jacobianl * sigmap*xixl
+          tempy1p(i,j,k) = jacobianl * sigmap*xiyl
+          tempz1p(i,j,k) = jacobianl * sigmap*xizl
+
+          tempx2p(i,j,k) = jacobianl * sigmap*etaxl
+          tempy2p(i,j,k) = jacobianl * sigmap*etayl
+          tempz2p(i,j,k) = jacobianl * sigmap*etazl
+
+          tempx3p(i,j,k) = jacobianl * sigmap*gammaxl
+          tempy3p(i,j,k) = jacobianl * sigmap*gammayl
+          tempz3p(i,j,k) = jacobianl * sigmap*gammazl
+
+
+        enddo
+      enddo
+    enddo
+
+!
+! second double-loop over GLL to compute all the terms
+!
+    do k = 1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+
+              tempx1ls = 0.
+              tempy1ls = 0.
+              tempz1ls = 0.
+
+              tempx2ls = 0.
+              tempy2ls = 0.
+              tempz2ls = 0.
+
+              tempx3ls = 0.
+              tempy3ls = 0.
+              tempz3ls = 0.
+
+              tempx1lw = 0.
+              tempy1lw = 0.
+              tempz1lw = 0.
+
+              tempx2lw = 0.
+              tempy2lw = 0.
+              tempz2lw = 0.
+
+              tempx3lw = 0.
+              tempy3lw = 0.
+              tempz3lw = 0.
+
+              do l=1,NGLLX
+                fac1 = hprimewgll_xx(l,i)
+                tempx1ls = tempx1ls + tempx1(l,j,k)*fac1
+                tempy1ls = tempy1ls + tempy1(l,j,k)*fac1
+                tempz1ls = tempz1ls + tempz1(l,j,k)*fac1
+                tempx1lw = tempx1lw + tempx1p(l,j,k)*fac1
+                tempy1lw = tempy1lw + tempy1p(l,j,k)*fac1
+                tempz1lw = tempz1lw + tempz1p(l,j,k)*fac1
+                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+                fac2 = hprimewgll_yy(l,j)
+                tempx2ls = tempx2ls + tempx2(i,l,k)*fac2
+                tempy2ls = tempy2ls + tempy2(i,l,k)*fac2
+                tempz2ls = tempz2ls + tempz2(i,l,k)*fac2
+                tempx2lw = tempx2lw + tempx2p(i,l,k)*fac2
+                tempy2lw = tempy2lw + tempy2p(i,l,k)*fac2
+                tempz2lw = tempz2lw + tempz2p(i,l,k)*fac2
+                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+                fac3 = hprimewgll_zz(l,k)
+                tempx3ls = tempx3ls + tempx3(i,j,l)*fac3
+                tempy3ls = tempy3ls + tempy3(i,j,l)*fac3
+                tempz3ls = tempz3ls + tempz3(i,j,l)*fac3
+                tempx3lw = tempx3lw + tempx3p(i,j,l)*fac3
+                tempy3lw = tempy3lw + tempy3p(i,j,l)*fac3
+                tempz3lw = tempz3lw + tempz3p(i,j,l)*fac3
+              enddo
+
+              fac1 = wgllwgll_yz(j,k)
+              fac2 = wgllwgll_xz(i,k)
+              fac3 = wgllwgll_xy(i,j)
+
+              phil = phistore(i,j,k,ispec)
+              tortl = tortstore(i,j,k,ispec)
+
+    ! sum contributions from each element to the global mesh
+
+              iglob = ibool(i,j,k,ispec)
+
+
+    accels_poroelastic(1,iglob) = accels_poroelastic(1,iglob) - ( fac1*(tempx1ls - phil/tortl*tempx1lw) &
+           + fac2*(tempx2ls - phil/tortl*tempx2lw) + fac3*(tempx3ls - phil/tortl*tempx3lw) )
+
+    accels_poroelastic(2,iglob) = accels_poroelastic(2,iglob) - ( fac1*(tempy1ls - phil/tortl*tempy1lw) &
+           + fac2*(tempy2ls - phil/tortl*tempy2lw) + fac3*(tempy3ls - phil/tortl*tempy3lw) )
+
+    accels_poroelastic(3,iglob) = accels_poroelastic(3,iglob) - ( fac1*(tempz1ls - phil/tortl*tempz1lw) &
+           + fac2*(tempz2ls - phil/tortl*tempz2lw) + fac3*(tempz3ls - phil/tortl*tempz3lw) )
+
+!
+!---- viscous damping
+!
+! add + phi/tort eta_f k^-1 dot(w)
+
+    etal_f = etastore(i,j,k,ispec)
+
+      if(etal_f >0.d0) then
+
+    permlxx = permstore(1,i,j,k,ispec)
+    permlxy = permstore(2,i,j,k,ispec)
+    permlxz = permstore(3,i,j,k,ispec)
+    permlyy = permstore(4,i,j,k,ispec)
+    permlyz = permstore(5,i,j,k,ispec)
+    permlzz = permstore(6,i,j,k,ispec)
+
+! calcul of the inverse of k
+    detk = permlxz*(permlxy*permlyz-permlxz*permlyy) &
+         - permlxy*(permlxy*permlzz-permlyz*permlxz) &
+         + permlxx*(permlyy*permlzz-permlyz*permlyz)
+
+    if(detk /= 0.d0) then
+     invpermlxx = (permlyy*permlzz-permlyz*permlyz)/detk
+     invpermlxy = (permlxz*permlyz-permlxy*permlzz)/detk
+     invpermlxz = (permlxy*permlyz-permlxz*permlyy)/detk
+     invpermlyy = (permlxx*permlzz-permlxz*permlxz)/detk
+     invpermlyz = (permlxy*permlxz-permlxx*permlyz)/detk
+     invpermlzz = (permlxx*permlyy-permlxy*permlxy)/detk
+    else
+      stop 'Permeability matrix is not inversible'
+    endif
+
+! relaxed viscous coef
+          bl_relaxed(1) = etal_f*invpermlxx
+          bl_relaxed(2) = etal_f*invpermlxy
+          bl_relaxed(3) = etal_f*invpermlxz
+          bl_relaxed(4) = etal_f*invpermlyy
+          bl_relaxed(5) = etal_f*invpermlyz
+          bl_relaxed(6) = etal_f*invpermlzz
+
+!    if(VISCOATTENUATION) then
+!          bl_unrelaxed(1) = etal_f*invpermlxx*theta_e/theta_s
+!          bl_unrelaxed(2) = etal_f*invpermlxz*theta_e/theta_s
+!          bl_unrelaxed(3) = etal_f*invpermlzz*theta_e/theta_s
+!    endif
+
+!     do k = 1,NGLLZ
+!      do j = 1,NGLLY
+!        do i = 1,NGLLX
+
+!              iglob = ibool(i,j,k,ispec)
+
+!     if(VISCOATTENUATION) then
+! compute the viscous damping term with the unrelaxed viscous coef and add memory variable
+!      viscodampx = velocw_poroelastic(1,iglob)*bl_unrelaxed(1) + velocw_poroelastic(2,iglob)*bl_unrelaxed(2)&
+!                  - rx_viscous(i,j,ispec)
+!      viscodampz = velocw_poroelastic(1,iglob)*bl_unrelaxed(2) + velocw_poroelastic(2,iglob)*bl_unrelaxed(3)&
+!                  - rz_viscous(i,j,ispec)
+!     else
+
+! no viscous attenuation
+      viscodampx = velocw_poroelastic(1,iglob)*bl_relaxed(1) + velocw_poroelastic(2,iglob)*bl_relaxed(2) + &
+                   velocw_poroelastic(3,iglob)*bl_relaxed(3)
+      viscodampy = velocw_poroelastic(1,iglob)*bl_relaxed(2) + velocw_poroelastic(2,iglob)*bl_relaxed(4) + &
+                   velocw_poroelastic(3,iglob)*bl_relaxed(5)
+      viscodampz = velocw_poroelastic(1,iglob)*bl_relaxed(3) + velocw_poroelastic(2,iglob)*bl_relaxed(5) + &
+                   velocw_poroelastic(3,iglob)*bl_relaxed(6)
+!     endif
+
+     accels_poroelastic(1,iglob) = accels_poroelastic(1,iglob) + phil/tortl*wxgll(i)*wygll(j)*wzgll(k)*jacobian(i,j,k,ispec)*&
+              viscodampx
+     accels_poroelastic(2,iglob) = accels_poroelastic(2,iglob) + phil/tortl*wxgll(i)*wygll(j)*wzgll(k)*jacobian(i,j,k,ispec)*&
+              viscodampy
+     accels_poroelastic(3,iglob) = accels_poroelastic(3,iglob) + phil/tortl*wxgll(i)*wygll(j)*wzgll(k)*jacobian(i,j,k,ispec)*&
+              viscodampz
+
+! if isolver == 1 .and. save_forward then b_viscodamp is saved in compute_forces_poro_fluid_part.f90
+!          if(isolver == 2) then ! kernels calculation
+!        b_accels_poroelastic(1,iglob) = b_accels_poroelastic(1,iglob) + phil/tortl*b_viscodampx(iglob)
+!        b_accels_poroelastic(2,iglob) = b_accels_poroelastic(2,iglob) + phil/tortl*b_viscodampz(iglob)
+!          endif
+
+!        enddo
+!      enddo
+!     enddo
+
+         endif ! if(etal_f >0.d0) then
+
+        enddo ! second loop over the GLL points
+      enddo
+    enddo
+
+    enddo ! end of loop over all spectral elements
+
+
+  end subroutine compute_forces_poro_solid_part
+

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_poroelastic.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_poroelastic.f90	2013-01-16 17:48:05 UTC (rev 21238)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_poroelastic.f90	2013-01-16 18:04:01 UTC (rev 21239)
@@ -53,7 +53,7 @@
     if( .NOT. GPU_MODE ) then
 ! solid phase
 
-    call compute_forces_solid( iphase, &
+    call compute_forces_poro_solid_part( iphase, &
                         NSPEC_AB,NGLOB_AB,displs_poroelastic,accels_poroelastic,&
                         displw_poroelastic,velocw_poroelastic,&
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
@@ -70,7 +70,7 @@
 
 ! fluid phase
 
-    call compute_forces_fluid( iphase, &
+    call compute_forces_poro_fluid_part( iphase, &
                         NSPEC_AB,NGLOB_AB,displw_poroelastic,accelw_poroelastic,&
                         velocw_poroelastic,displs_poroelastic,&
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
@@ -89,7 +89,7 @@
     if( SIMULATION_TYPE == 3 ) then
 ! solid phase
 
-    call compute_forces_solid( iphase, &
+    call compute_forces_poro_solid_part( iphase, &
                         NSPEC_AB,NGLOB_AB,b_displs_poroelastic,b_accels_poroelastic,&
                         b_displw_poroelastic,b_velocw_poroelastic,&
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
@@ -106,7 +106,7 @@
 
 ! fluid phase
 
-    call compute_forces_fluid( iphase, &
+    call compute_forces_poro_fluid_part( iphase, &
                         NSPEC_AB,NGLOB_AB,b_displw_poroelastic,b_accelw_poroelastic,&
                         b_velocw_poroelastic,b_displs_poroelastic,&
                         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &

Deleted: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_solid.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_solid.f90	2013-01-16 17:48:05 UTC (rev 21238)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_solid.f90	2013-01-16 18:04:01 UTC (rev 21239)
@@ -1,560 +0,0 @@
-!=====================================================================
-!
-!               S p e c f e m 3 D  V e r s i o n  2 . 1
-!               ---------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!    Princeton University, USA and CNRS / INRIA / University of Pau
-! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
-!                             July 2012
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-
-  subroutine compute_forces_solid( iphase, &
-                        NSPEC_AB,NGLOB_AB,displs_poroelastic,accels_poroelastic,&
-                        displw_poroelastic,velocw_poroelastic,&
-                        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
-                        hprime_xx,hprime_yy,hprime_zz,&
-                        hprimewgll_xx,hprimewgll_yy,hprimewgll_zz,&
-                        wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wxgll,wygll,wzgll,  &
-                        kappaarraystore,rhoarraystore,mustore,etastore,permstore, &
-                        phistore,tortstore,jacobian,ibool,&
-                        epsilonsdev_xx,epsilonsdev_yy,epsilonsdev_xy,&
-                        epsilonsdev_xz,epsilonsdev_yz,epsilons_trace_over_3, &
-                        SIMULATION_TYPE,NSPEC_ADJOINT, &
-                        num_phase_ispec_poroelastic,nspec_inner_poroelastic,nspec_outer_poroelastic,&
-                        phase_ispec_inner_poroelastic )
-
-! compute forces for the solid poroelastic part
-
-  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
-                      N_SLS, &
-                      ONE_THIRD,FOUR_THIRDS
-
-  implicit none
-
-  integer :: iphase
-  integer :: NSPEC_AB,NGLOB_AB
-
-! adjoint simulations
-  integer :: SIMULATION_TYPE
-  !integer :: NSPEC_BOUN
-  integer :: NSPEC_ADJOINT
-! adjoint wavefields
-!  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: &
-!    mufr_kl, B_kl
-
-! displacement and acceleration
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displs_poroelastic,accels_poroelastic
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displw_poroelastic,velocw_poroelastic
-
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: &
-       epsilonsdev_xx,epsilonsdev_yy,epsilonsdev_xy,epsilonsdev_xz,epsilonsdev_yz
-  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilons_trace_over_3
-
-! arrays with mesh parameters per slice
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
-        mustore,etastore,phistore,tortstore,jacobian
-  real(kind=CUSTOM_REAL), dimension(2,NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: rhoarraystore
-  real(kind=CUSTOM_REAL), dimension(3,NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: kappaarraystore
-  real(kind=CUSTOM_REAL), dimension(6,NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: permstore
-
-! Gauss-Lobatto-Legendre points of integration and weights
-  double precision, dimension(NGLLX) :: wxgll
-  double precision, dimension(NGLLY) :: wygll
-  double precision, dimension(NGLLZ) :: wzgll
-
-! array with derivatives of Lagrange polynomials and precalculated products
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
-  real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
-  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
-!  logical,dimension(NSPEC_AB) :: ispec_is_elastic
-  integer :: num_phase_ispec_poroelastic,nspec_inner_poroelastic,nspec_outer_poroelastic
-  integer, dimension(num_phase_ispec_poroelastic,2) :: phase_ispec_inner_poroelastic
-
-! local parameters
-
-  integer :: ispec,i,j,k,l,iglob,num_elements,ispec_p
-
-! spatial derivatives
-  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
-    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
-    tempx1p,tempx2p,tempx3p,tempy1p,tempy2p,tempy3p,tempz1p,tempz2p,tempz3p
-!    b_tempx1,b_tempx2,b_tempx3,b_tempy1,b_tempy2,b_tempy3,b_tempz1,b_tempz2,b_tempz3, &
-!    b_tempx1p,b_tempx2p,b_tempx3p,b_tempy1p,b_tempy2p,b_tempy3p,b_tempz1p,b_tempz2p,b_tempz3p
-
-  real(kind=CUSTOM_REAL) :: duxdxl,duydxl,duzdxl,duxdyl,duydyl,duzdyl,duxdzl,duydzl,duzdzl
-  real(kind=CUSTOM_REAL) :: dwxdxl,dwydxl,dwzdxl,dwxdyl,dwydyl,dwzdyl,dwxdzl,dwydzl,dwzdzl
-
-  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
-  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
-  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl_plus_duzdzl,dwxdxl_plus_dwydyl_plus_dwzdzl
-
-  real(kind=CUSTOM_REAL) hp1,hp2,hp3
-  real(kind=CUSTOM_REAL) fac1,fac2,fac3
-
-  real(kind=CUSTOM_REAL) tempx1ls,tempx2ls,tempx3ls,tempx1lw,tempx2lw,tempx3lw
-  real(kind=CUSTOM_REAL) tempy1ls,tempy2ls,tempy3ls,tempy1lw,tempy2lw,tempy3lw
-  real(kind=CUSTOM_REAL) tempz1ls,tempz2ls,tempz3ls,tempz1lw,tempz2lw,tempz3lw
-
-!  real(kind=CUSTOM_REAL) :: b_dux_dxl,b_duz_dxl,b_dux_dzl,b_duz_dzl
-!  real(kind=CUSTOM_REAL) :: dsxx,dsxy,dsxz,dsyy,dsyz,dszz
-!  real(kind=CUSTOM_REAL) :: b_dsxx,b_dsxy,b_dsxz,b_dsyy,b_dsyz,b_dszz
-!  real(kind=CUSTOM_REAL) :: b_dwx_dxl,b_dwz_dxl,b_dwx_dzl,b_dwz_dzl
-!  real(kind=CUSTOM_REAL) :: dwxx,dwxy,dwxz,dwyy,dwyz,dwzz
-!  real(kind=CUSTOM_REAL) :: b_dwxx,b_dwxy,b_dwxz,b_dwyy,b_dwyz,b_dwzz
-  real(kind=CUSTOM_REAL) :: sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,sigma_yx,sigma_zx,sigma_zy
-  real(kind=CUSTOM_REAL) :: sigmap
-!  real(kind=CUSTOM_REAL) :: b_sigma_xx,b_sigma_yy,b_sigma_zz,b_sigma_xy,b_sigma_xz,b_sigma_yz
-!  real(kind=CUSTOM_REAL) :: b_sigmap
-!  real(kind=CUSTOM_REAL) :: nx,nz,vx,vz,vn,vxf,vzf,vnf,rho_vpI,rho_vpII,rho_vs,tx,tz,weight,xxi,zxi,xgamma,zgamma,jacobian1D
-
-! viscous attenuation (poroelastic media)
-  real(kind=CUSTOM_REAL), dimension(6) :: bl_relaxed
-
-
-! Jacobian matrix and determinant
-  real(kind=CUSTOM_REAL) ::  xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
-
-! material properties of the poroelastic medium
-!  real(kind=CUSTOM_REAL) :: mul_unrelaxed,lambdal_unrelaxed,lambdalplus2mul_unrelaxed
-  real(kind=CUSTOM_REAL) :: kappal_s,rhol_s
-  real(kind=CUSTOM_REAL) :: etal_f,kappal_f,rhol_f
-  real(kind=CUSTOM_REAL) :: mul_fr,kappal_fr,phil,tortl,viscodampx,viscodampy,viscodampz
-  real(kind=CUSTOM_REAL) :: permlxx,permlxy,permlxz,permlyz,permlyy,permlzz,&
-                            invpermlxx,invpermlxy,invpermlxz,invpermlyz,invpermlyy,invpermlzz,detk
-  real(kind=CUSTOM_REAL) :: D_biot,H_biot,C_biot,M_biot,rhol_bar
-
-  real(kind=CUSTOM_REAL) :: mul_G,lambdal_G,lambdalplus2mul_G
-!  real(kind=CUSTOM_REAL) :: cpIsquare,cpIIsquare,cssquare,cpIl,cpIIl,csl
-
-! for attenuation
-!  real(kind=CUSTOM_REAL) :: Un,Unp1,tauinv,Sn,Snp1,theta_n,theta_np1,tauinvsquare,tauinvcube,tauinvUn
-
-
-! compute Grad(displs_poroelastic) at time step n for attenuation
-!  if(TURN_ATTENUATION_ON) call compute_gradient_attenuation(displs_poroelastic,dux_dxl_n,duz_dxl_n, &
-!      dux_dzl_n,duz_dzl_n,xix,xiz,gammax,gammaz,ibool,poroelastic,hprime_xx,hprime_zz,nspec,npoin)
-
-
-  if( iphase == 1 ) then
-    num_elements = nspec_outer_poroelastic
-  else
-    num_elements = nspec_inner_poroelastic
-  endif
-
-! loop over spectral elements
-  do ispec_p = 1,num_elements
-
-        ispec = phase_ispec_inner_poroelastic(ispec_p,iphase)
-
-! first double loop over GLL points to compute and store gradients
-    do k=1,NGLLZ
-      do j = 1,NGLLY
-        do i = 1,NGLLX
-
-! get poroelastic parameters of current local GLL
-    phil = phistore(i,j,k,ispec)
-    tortl = tortstore(i,j,k,ispec)
-!solid properties
-    kappal_s = kappaarraystore(1,i,j,k,ispec)
-    rhol_s = rhoarraystore(1,i,j,k,ispec)
-!fluid properties
-    kappal_f = kappaarraystore(2,i,j,k,ispec)
-    rhol_f = rhoarraystore(2,i,j,k,ispec)
-!frame properties
-    mul_fr = mustore(i,j,k,ispec)
-    kappal_fr = kappaarraystore(3,i,j,k,ispec)
-    rhol_bar =  (1._CUSTOM_REAL - phil)*rhol_s + phil*rhol_f
-!Biot coefficients for the input phi
-      D_biot = kappal_s*(1._CUSTOM_REAL + phil*(kappal_s/kappal_f - 1._CUSTOM_REAL))
-      H_biot = (kappal_s - kappal_fr)*(kappal_s - kappal_fr)/(D_biot - kappal_fr) + &
-                kappal_fr + 4._CUSTOM_REAL*mul_fr/3._CUSTOM_REAL
-      C_biot = kappal_s*(kappal_s - kappal_fr)/(D_biot - kappal_fr)
-      M_biot = kappal_s*kappal_s/(D_biot - kappal_fr)
-!The RHS has the form : div T -phi/c div T_f + phi/ceta_fk^-1.partial t w
-!where T = G:grad u_s + C_biot div w I
-!and T_f = C_biot div u_s I + M_biot div w I
-      mul_G = mul_fr
-      lambdal_G = H_biot - 2._CUSTOM_REAL*mul_fr
-      lambdalplus2mul_G = lambdal_G + 2._CUSTOM_REAL*mul_G
-
-! derivative along x,y,z for u_s and w
-              tempx1ls = 0.
-              tempx2ls = 0.
-              tempx3ls = 0.
-
-              tempy1ls = 0.
-              tempy2ls = 0.
-              tempy3ls = 0.
-
-              tempz1ls = 0.
-              tempz2ls = 0.
-              tempz3ls = 0.
-
-              tempx1lw = 0.
-              tempx2lw = 0.
-              tempx3lw = 0.
-
-              tempy1lw = 0.
-              tempy2lw = 0.
-              tempy3lw = 0.
-
-              tempz1lw = 0.
-              tempz2lw = 0.
-              tempz3lw = 0.
-
-! first double loop over GLL points to compute and store gradients
-          do l = 1,NGLLX
-                hp1 = hprime_xx(i,l)
-                iglob = ibool(l,j,k,ispec)
-                tempx1ls = tempx1ls + displs_poroelastic(1,iglob)*hp1
-                tempy1ls = tempy1ls + displs_poroelastic(2,iglob)*hp1
-                tempz1ls = tempz1ls + displs_poroelastic(3,iglob)*hp1
-                tempx1lw = tempx1lw + displw_poroelastic(1,iglob)*hp1
-                tempy1lw = tempy1lw + displw_poroelastic(2,iglob)*hp1
-                tempz1lw = tempz1lw + displw_poroelastic(3,iglob)*hp1
-    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
-
-    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
-                hp2 = hprime_yy(j,l)
-                iglob = ibool(i,l,k,ispec)
-                tempx2ls = tempx2ls + displs_poroelastic(1,iglob)*hp2
-                tempy2ls = tempy2ls + displs_poroelastic(2,iglob)*hp2
-                tempz2ls = tempz2ls + displs_poroelastic(3,iglob)*hp2
-                tempx2lw = tempx2lw + displw_poroelastic(1,iglob)*hp2
-                tempy2lw = tempy2lw + displw_poroelastic(2,iglob)*hp2
-                tempz2lw = tempz2lw + displw_poroelastic(3,iglob)*hp2
-    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
-
-    !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
-                hp3 = hprime_zz(k,l)
-                iglob = ibool(i,j,l,ispec)
-                tempx3ls = tempx3ls + displs_poroelastic(1,iglob)*hp3
-                tempy3ls = tempy3ls + displs_poroelastic(2,iglob)*hp3
-                tempz3ls = tempz3ls + displs_poroelastic(3,iglob)*hp3
-                tempx3lw = tempx3lw + displw_poroelastic(1,iglob)*hp3
-                tempy3lw = tempy3lw + displw_poroelastic(2,iglob)*hp3
-                tempz3lw = tempz3lw + displw_poroelastic(3,iglob)*hp3
-          enddo
-
-              xixl = xix(i,j,k,ispec)
-              xiyl = xiy(i,j,k,ispec)
-              xizl = xiz(i,j,k,ispec)
-              etaxl = etax(i,j,k,ispec)
-              etayl = etay(i,j,k,ispec)
-              etazl = etaz(i,j,k,ispec)
-              gammaxl = gammax(i,j,k,ispec)
-              gammayl = gammay(i,j,k,ispec)
-              gammazl = gammaz(i,j,k,ispec)
-              jacobianl = jacobian(i,j,k,ispec)
-
-! derivatives of displacement
-              duxdxl = xixl*tempx1ls + etaxl*tempx2ls + gammaxl*tempx3ls
-              duxdyl = xiyl*tempx1ls + etayl*tempx2ls + gammayl*tempx3ls
-              duxdzl = xizl*tempx1ls + etazl*tempx2ls + gammazl*tempx3ls
-
-              duydxl = xixl*tempy1ls + etaxl*tempy2ls + gammaxl*tempy3ls
-              duydyl = xiyl*tempy1ls + etayl*tempy2ls + gammayl*tempy3ls
-              duydzl = xizl*tempy1ls + etazl*tempy2ls + gammazl*tempy3ls
-
-              duzdxl = xixl*tempz1ls + etaxl*tempz2ls + gammaxl*tempz3ls
-              duzdyl = xiyl*tempz1ls + etayl*tempz2ls + gammayl*tempz3ls
-              duzdzl = xizl*tempz1ls + etazl*tempz2ls + gammazl*tempz3ls
-
-              dwxdxl = xixl*tempx1lw + etaxl*tempx2lw + gammaxl*tempx3lw
-              dwxdyl = xiyl*tempx1lw + etayl*tempx2lw + gammayl*tempx3lw
-              dwxdzl = xizl*tempx1lw + etazl*tempx2lw + gammazl*tempx3lw
-
-              dwydxl = xixl*tempy1lw + etaxl*tempy2lw + gammaxl*tempy3lw
-              dwydyl = xiyl*tempy1lw + etayl*tempy2lw + gammayl*tempy3lw
-              dwydzl = xizl*tempy1lw + etazl*tempy2lw + gammazl*tempy3lw
-
-              dwzdxl = xixl*tempz1lw + etaxl*tempz2lw + gammaxl*tempz3lw
-              dwzdyl = xiyl*tempz1lw + etayl*tempz2lw + gammayl*tempz3lw
-              dwzdzl = xizl*tempz1lw + etazl*tempz2lw + gammazl*tempz3lw
-
-    ! precompute some sums to save CPU time
-              duxdxl_plus_duydyl_plus_duzdzl = duxdxl + duydyl + duzdzl
-              dwxdxl_plus_dwydyl_plus_dwzdzl = dwxdxl + dwydyl + dwzdzl
-              duxdxl_plus_duydyl = duxdxl + duydyl
-              duxdxl_plus_duzdzl = duxdxl + duzdzl
-              duydyl_plus_duzdzl = duydyl + duzdzl
-              duxdyl_plus_duydxl = duxdyl + duydxl
-              duzdxl_plus_duxdzl = duzdxl + duxdzl
-              duzdyl_plus_duydzl = duzdyl + duydzl
-
-! compute stress tensor (include attenuation or anisotropy if needed)
-
-!  if(VISCOATTENUATION) then
-!chris:check
-
-! Dissipation only controlled by frame share attenuation in poroelastic (see Morency & Tromp, GJI 2008).
-! attenuation is implemented following the memory variable formulation of
-! J. M. Carcione, Seismic modeling in viscoelastic media, Geophysics,
-! vol. 58(1), p. 110-120 (1993). More details can be found in
-! J. M. Carcione, D. Kosloff and R. Kosloff, Wave propagation simulation in a linear
-! viscoelastic medium, Geophysical Journal International, vol. 95, p. 597-611 (1988).
-
-!  else
-
-! no attenuation
-    sigma_xx = lambdalplus2mul_G*duxdxl + lambdal_G*duydyl_plus_duzdzl + C_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
-    sigma_yy = lambdalplus2mul_G*duydyl + lambdal_G*duxdxl_plus_duzdzl + C_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
-    sigma_zz = lambdalplus2mul_G*duzdzl + lambdal_G*duxdxl_plus_duydyl + C_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
-
-    sigma_xy = mul_G*duxdyl_plus_duydxl
-    sigma_xz = mul_G*duzdxl_plus_duxdzl
-    sigma_yz = mul_G*duzdyl_plus_duydzl
-
-    sigmap = C_biot*duxdxl_plus_duydyl_plus_duzdzl + M_biot*dwxdxl_plus_dwydyl_plus_dwzdzl
-
-!  endif !if(ATTENUATION)
-
-          if(SIMULATION_TYPE == 3) then ! kernels calculation
-    epsilons_trace_over_3(i,j,k,ispec) = ONE_THIRD * (duxdxl + duydyl + duzdzl)
-    epsilonsdev_xx(i,j,k,ispec) = duxdxl - ONE_THIRD * (duxdxl + duydyl + duzdzl)
-    epsilonsdev_yy(i,j,k,ispec) = duydyl - ONE_THIRD * (duxdxl + duydyl + duzdzl)
-    epsilonsdev_xy(i,j,k,ispec) = 0.5 * duxdyl_plus_duydxl
-    epsilonsdev_xz(i,j,k,ispec) = 0.5 * duzdxl_plus_duxdzl
-    epsilonsdev_yz(i,j,k,ispec) = 0.5 * duzdyl_plus_duydzl
-          endif
-
-
-! weak formulation term based on stress tensor (non-symmetric form)
-            ! define symmetric components of sigma
-            sigma_yx = sigma_xy
-            sigma_zx = sigma_xz
-            sigma_zy = sigma_yz
-
-            ! form dot product with test vector, non-symmetric form (which is useful in the case of PML)
-            tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
-            tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
-            tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
-
-            tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
-            tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
-            tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
-
-            tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
-            tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
-            tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
-
-          tempx1p(i,j,k) = jacobianl * sigmap*xixl
-          tempy1p(i,j,k) = jacobianl * sigmap*xiyl
-          tempz1p(i,j,k) = jacobianl * sigmap*xizl
-
-          tempx2p(i,j,k) = jacobianl * sigmap*etaxl
-          tempy2p(i,j,k) = jacobianl * sigmap*etayl
-          tempz2p(i,j,k) = jacobianl * sigmap*etazl
-
-          tempx3p(i,j,k) = jacobianl * sigmap*gammaxl
-          tempy3p(i,j,k) = jacobianl * sigmap*gammayl
-          tempz3p(i,j,k) = jacobianl * sigmap*gammazl
-
-
-        enddo
-      enddo
-    enddo
-
-!
-! second double-loop over GLL to compute all the terms
-!
-    do k = 1,NGLLZ
-      do j = 1,NGLLY
-        do i = 1,NGLLX
-
-              tempx1ls = 0.
-              tempy1ls = 0.
-              tempz1ls = 0.
-
-              tempx2ls = 0.
-              tempy2ls = 0.
-              tempz2ls = 0.
-
-              tempx3ls = 0.
-              tempy3ls = 0.
-              tempz3ls = 0.
-
-              tempx1lw = 0.
-              tempy1lw = 0.
-              tempz1lw = 0.
-
-              tempx2lw = 0.
-              tempy2lw = 0.
-              tempz2lw = 0.
-
-              tempx3lw = 0.
-              tempy3lw = 0.
-              tempz3lw = 0.
-
-              do l=1,NGLLX
-                fac1 = hprimewgll_xx(l,i)
-                tempx1ls = tempx1ls + tempx1(l,j,k)*fac1
-                tempy1ls = tempy1ls + tempy1(l,j,k)*fac1
-                tempz1ls = tempz1ls + tempz1(l,j,k)*fac1
-                tempx1lw = tempx1lw + tempx1p(l,j,k)*fac1
-                tempy1lw = tempy1lw + tempy1p(l,j,k)*fac1
-                tempz1lw = tempz1lw + tempz1p(l,j,k)*fac1
-                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
-
-                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
-                fac2 = hprimewgll_yy(l,j)
-                tempx2ls = tempx2ls + tempx2(i,l,k)*fac2
-                tempy2ls = tempy2ls + tempy2(i,l,k)*fac2
-                tempz2ls = tempz2ls + tempz2(i,l,k)*fac2
-                tempx2lw = tempx2lw + tempx2p(i,l,k)*fac2
-                tempy2lw = tempy2lw + tempy2p(i,l,k)*fac2
-                tempz2lw = tempz2lw + tempz2p(i,l,k)*fac2
-                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
-
-                !!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
-                fac3 = hprimewgll_zz(l,k)
-                tempx3ls = tempx3ls + tempx3(i,j,l)*fac3
-                tempy3ls = tempy3ls + tempy3(i,j,l)*fac3
-                tempz3ls = tempz3ls + tempz3(i,j,l)*fac3
-                tempx3lw = tempx3lw + tempx3p(i,j,l)*fac3
-                tempy3lw = tempy3lw + tempy3p(i,j,l)*fac3
-                tempz3lw = tempz3lw + tempz3p(i,j,l)*fac3
-              enddo
-
-              fac1 = wgllwgll_yz(j,k)
-              fac2 = wgllwgll_xz(i,k)
-              fac3 = wgllwgll_xy(i,j)
-
-              phil = phistore(i,j,k,ispec)
-              tortl = tortstore(i,j,k,ispec)
-
-    ! sum contributions from each element to the global mesh
-
-              iglob = ibool(i,j,k,ispec)
-
-
-    accels_poroelastic(1,iglob) = accels_poroelastic(1,iglob) - ( fac1*(tempx1ls - phil/tortl*tempx1lw) &
-           + fac2*(tempx2ls - phil/tortl*tempx2lw) + fac3*(tempx3ls - phil/tortl*tempx3lw) )
-
-    accels_poroelastic(2,iglob) = accels_poroelastic(2,iglob) - ( fac1*(tempy1ls - phil/tortl*tempy1lw) &
-           + fac2*(tempy2ls - phil/tortl*tempy2lw) + fac3*(tempy3ls - phil/tortl*tempy3lw) )
-
-    accels_poroelastic(3,iglob) = accels_poroelastic(3,iglob) - ( fac1*(tempz1ls - phil/tortl*tempz1lw) &
-           + fac2*(tempz2ls - phil/tortl*tempz2lw) + fac3*(tempz3ls - phil/tortl*tempz3lw) )
-
-!
-!---- viscous damping
-!
-! add + phi/tort eta_f k^-1 dot(w)
-
-    etal_f = etastore(i,j,k,ispec)
-
-      if(etal_f >0.d0) then
-
-    permlxx = permstore(1,i,j,k,ispec)
-    permlxy = permstore(2,i,j,k,ispec)
-    permlxz = permstore(3,i,j,k,ispec)
-    permlyy = permstore(4,i,j,k,ispec)
-    permlyz = permstore(5,i,j,k,ispec)
-    permlzz = permstore(6,i,j,k,ispec)
-
-! calcul of the inverse of k
-    detk = permlxz*(permlxy*permlyz-permlxz*permlyy) &
-         - permlxy*(permlxy*permlzz-permlyz*permlxz) &
-         + permlxx*(permlyy*permlzz-permlyz*permlyz)
-
-    if(detk /= 0.d0) then
-     invpermlxx = (permlyy*permlzz-permlyz*permlyz)/detk
-     invpermlxy = (permlxz*permlyz-permlxy*permlzz)/detk
-     invpermlxz = (permlxy*permlyz-permlxz*permlyy)/detk
-     invpermlyy = (permlxx*permlzz-permlxz*permlxz)/detk
-     invpermlyz = (permlxy*permlxz-permlxx*permlyz)/detk
-     invpermlzz = (permlxx*permlyy-permlxy*permlxy)/detk
-    else
-      stop 'Permeability matrix is not inversible'
-    endif
-
-! relaxed viscous coef
-          bl_relaxed(1) = etal_f*invpermlxx
-          bl_relaxed(2) = etal_f*invpermlxy
-          bl_relaxed(3) = etal_f*invpermlxz
-          bl_relaxed(4) = etal_f*invpermlyy
-          bl_relaxed(5) = etal_f*invpermlyz
-          bl_relaxed(6) = etal_f*invpermlzz
-
-!    if(VISCOATTENUATION) then
-!          bl_unrelaxed(1) = etal_f*invpermlxx*theta_e/theta_s
-!          bl_unrelaxed(2) = etal_f*invpermlxz*theta_e/theta_s
-!          bl_unrelaxed(3) = etal_f*invpermlzz*theta_e/theta_s
-!    endif
-
-!     do k = 1,NGLLZ
-!      do j = 1,NGLLY
-!        do i = 1,NGLLX
-
-!              iglob = ibool(i,j,k,ispec)
-
-!     if(VISCOATTENUATION) then
-! compute the viscous damping term with the unrelaxed viscous coef and add memory variable
-!      viscodampx = velocw_poroelastic(1,iglob)*bl_unrelaxed(1) + velocw_poroelastic(2,iglob)*bl_unrelaxed(2)&
-!                  - rx_viscous(i,j,ispec)
-!      viscodampz = velocw_poroelastic(1,iglob)*bl_unrelaxed(2) + velocw_poroelastic(2,iglob)*bl_unrelaxed(3)&
-!                  - rz_viscous(i,j,ispec)
-!     else
-
-! no viscous attenuation
-      viscodampx = velocw_poroelastic(1,iglob)*bl_relaxed(1) + velocw_poroelastic(2,iglob)*bl_relaxed(2) + &
-                   velocw_poroelastic(3,iglob)*bl_relaxed(3)
-      viscodampy = velocw_poroelastic(1,iglob)*bl_relaxed(2) + velocw_poroelastic(2,iglob)*bl_relaxed(4) + &
-                   velocw_poroelastic(3,iglob)*bl_relaxed(5)
-      viscodampz = velocw_poroelastic(1,iglob)*bl_relaxed(3) + velocw_poroelastic(2,iglob)*bl_relaxed(5) + &
-                   velocw_poroelastic(3,iglob)*bl_relaxed(6)
-!     endif
-
-     accels_poroelastic(1,iglob) = accels_poroelastic(1,iglob) + phil/tortl*wxgll(i)*wygll(j)*wzgll(k)*jacobian(i,j,k,ispec)*&
-              viscodampx
-     accels_poroelastic(2,iglob) = accels_poroelastic(2,iglob) + phil/tortl*wxgll(i)*wygll(j)*wzgll(k)*jacobian(i,j,k,ispec)*&
-              viscodampy
-     accels_poroelastic(3,iglob) = accels_poroelastic(3,iglob) + phil/tortl*wxgll(i)*wygll(j)*wzgll(k)*jacobian(i,j,k,ispec)*&
-              viscodampz
-
-! if isolver == 1 .and. save_forward then b_viscodamp is save in compute_forces_fluid.f90
-!          if(isolver == 2) then ! kernels calculation
-!        b_accels_poroelastic(1,iglob) = b_accels_poroelastic(1,iglob) + phil/tortl*b_viscodampx(iglob)
-!        b_accels_poroelastic(2,iglob) = b_accels_poroelastic(2,iglob) + phil/tortl*b_viscodampz(iglob)
-!          endif
-
-!        enddo
-!      enddo
-!     enddo
-
-         endif ! if(etal_f >0.d0) then
-
-        enddo ! second loop over the GLL points
-      enddo
-    enddo
-
-    enddo ! end of loop over all spectral elements
-
-
-  end subroutine compute_forces_solid
-

Copied: seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_compute_forces_elastic_Dev_openmp.f90 (from rev 21237, seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_Dev_openmp.f90)
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_compute_forces_elastic_Dev_openmp.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_compute_forces_elastic_Dev_openmp.f90	2013-01-16 18:04:01 UTC (rev 21239)
@@ -0,0 +1,984 @@
+!=====================================================================
+!
+!               S p e c f e m 3 D  V e r s i o n  2 . 1
+!               ---------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!    Princeton University, USA and CNRS / INRIA / University of Pau
+! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
+!                            October 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! OpenMP Threaded variant by John Levesque, Max Rietmann and Olaf Schenk
+
+!! DK DK Jan 2013: beware, that OpenMP version is not maintained / supported and thus probably does not work
+
+  subroutine compute_forces_elastic_Dev_openmp(iphase ,NSPEC_AB,NGLOB_AB, &
+                             displ,veloc,accel, &
+                             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+                             hprime_xx,hprime_xxT, &
+                             hprimewgll_xx,hprimewgll_xxT, &
+                             wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+                             kappastore,mustore,jacobian,ibool, &
+                             ATTENUATION,deltat, &
+                             one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
+                             NSPEC_ATTENUATION_AB, &
+                             R_xx,R_yy,R_xy,R_xz,R_yz, &
+                             epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                             epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+                             ANISOTROPY,NSPEC_ANISO, &
+                             c11store,c12store,c13store,c14store,c15store,c16store,&
+                             c22store,c23store,c24store,c25store,c26store,c33store,&
+                             c34store,c35store,c36store,c44store,c45store,c46store,&
+                             c55store,c56store,c66store, &
+                             SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+                             NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
+                             is_moho_top,is_moho_bot, &
+                             dsdx_top,dsdx_bot, &
+                             ispec2D_moho_top,ispec2D_moho_bot, &
+                             num_phase_ispec_elastic,&
+                             phase_ispec_inner_elastic,&
+                             num_colors_outer_elastic,num_colors_inner_elastic)
+
+
+
+  ! computes elastic tensor term
+
+  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
+       N_SLS,SAVE_MOHO_MESH, &
+       ONE_THIRD,FOUR_THIRDS,m1,m2
+
+  ! Trying to pass these variables as subroutine arguments ran into
+  ! problems, so we reference them from their module, making them
+  ! accessible from this subroutine
+  use specfem_par_elastic, only:dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3, &
+       newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3, &
+       tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3,num_elem_colors_elastic, &
+       dummyx_loc_att,dummyy_loc_att,dummyz_loc_att,tempx1_att,tempx2_att,tempx3_att, &
+       tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
+
+  use fault_solver_dynamic, only : Kelvin_Voigt_eta
+
+  implicit none
+
+  integer :: NSPEC_AB,NGLOB_AB
+
+  ! displacement and acceleration
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,accel
+
+
+  ! arrays with mesh parameters per slice
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+       xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+       kappastore,mustore,jacobian
+
+  ! array with derivatives of Lagrange polynomials and precalculated products
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+  ! memory variables and standard linear solids for attenuation
+  logical :: ATTENUATION
+  logical :: COMPUTE_AND_STORE_STRAIN
+  integer :: NSPEC_STRAIN_ONLY, NSPEC_ADJOINT
+  integer :: NSPEC_ATTENUATION_AB
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: one_minus_sum_beta
+  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: factor_common
+  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB,N_SLS) :: &
+       R_xx,R_yy,R_xy,R_xz,R_yz
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY) :: &
+       epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilon_trace_over_3
+
+  ! anisotropy
+  logical :: ANISOTROPY
+  integer :: NSPEC_ANISO
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
+       c11store,c12store,c13store,c14store,c15store,c16store, &
+       c22store,c23store,c24store,c25store,c26store,c33store, &
+       c34store,c35store,c36store,c44store,c45store,c46store, &
+       c55store,c56store,c66store
+
+  integer :: iphase
+  integer :: num_phase_ispec_elastic
+  integer, dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
+
+  ! adjoint simulations
+  integer :: SIMULATION_TYPE
+  integer :: NSPEC_BOUN,NSPEC2D_MOHO
+
+  ! moho kernel
+  real(kind=CUSTOM_REAL),dimension(NDIM,NDIM,NGLLX,NGLLY,NGLLZ,NSPEC2D_MOHO):: &
+       dsdx_top,dsdx_bot
+  logical,dimension(NSPEC_BOUN) :: is_moho_top,is_moho_bot
+  integer :: ispec2D_moho_top, ispec2D_moho_bot
+
+  ! local attenuation parameters
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: epsilondev_xx_loc, &
+       epsilondev_yy_loc, epsilondev_xy_loc, epsilondev_xz_loc, epsilondev_yz_loc
+  real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3
+  real(kind=CUSTOM_REAL) factor_loc,alphaval_loc,betaval_loc,gammaval_loc
+  real(kind=CUSTOM_REAL) Sn,Snp1
+  real(kind=CUSTOM_REAL) templ
+
+  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+  real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
+
+  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
+
+  real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
+
+  real(kind=CUSTOM_REAL) fac1,fac2,fac3
+
+  real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
+  real(kind=CUSTOM_REAL) kappal
+
+  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
+  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att;
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att;
+
+  integer OMP_get_thread_num
+  integer OMP_GET_MAX_THREADS
+
+  ! timing
+  !double precision omp_get_wtime
+  !double precision start_time
+  !double precision end_time
+  !double precision accumulate_time_start
+  !double precision accumulate_time_stop
+
+  ! local anisotropy parameters
+  real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
+       c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
+
+  integer i_SLS,imodulo_N_SLS
+  integer ispec,iglob,ispec_p,num_elements
+  integer i,j,k
+  integer thread_id
+  integer NUM_THREADS
+  !integer omp_get_num_threads ! function
+
+  ! coloring additions
+  ! integer, dimension(:), allocatable :: num_elem_colors_elastic
+  integer istart, estart, number_of_colors
+  integer num_colors_outer_elastic, num_colors_inner_elastic
+  integer icolor
+
+  real(kind=CUSTOM_REAL) :: eta
+
+  ! write(*,*) "num_elem_colors_elastic(1) = ",num_elem_colors_elastic(1)
+  imodulo_N_SLS = mod(N_SLS,3)
+
+  ! NUM_THREADS = 1
+  NUM_THREADS = OMP_GET_MAX_THREADS()
+
+
+  ! choses inner/outer elements
+  if( iphase == 1 ) then
+     number_of_colors = num_colors_outer_elastic
+     istart = 1
+  else
+     number_of_colors = num_colors_inner_elastic + num_colors_outer_elastic
+     istart = num_colors_outer_elastic+1
+     ! istart = num_colors_outer_elastic
+  endif
+
+  ! "start" timer
+  ! start_time = omp_get_wtime()
+
+  ! The mesh coloring algorithm provides disjoint sets of elements that
+  ! do not share degrees of freedom which is required for the assembly
+  ! step at the "accel(iglob) += update" step. The coloring is
+  ! implemented, such that the element and node indices are ordered by
+  ! color. This requires then only to iterate through the elements in
+  ! order, stopping to synchronize threads after all the elements in a
+  ! color are finished.
+  estart = 1
+  do icolor = istart, number_of_colors
+
+    !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(&
+    !$OMP R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3,&
+    !$OMP factor_loc,alphaval_loc,betaval_loc,gammaval_loc,&
+    !$OMP Sn,Snp1,&
+    !$OMP templ,&
+    !$OMP xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl,&
+    !$OMP duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl,&
+    !$OMP duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl,&
+    !$OMP duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl,&
+    !$OMP sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,&
+    !$OMP fac1,fac2,fac3,&
+    !$OMP lambdal,mul,lambdalplus2mul,kappal,&
+    !$OMP c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
+    !$OMP c33,c34,c35,c36,c44,c45,c46,c55,c56,c66,&
+    !$OMP i_SLS,&
+    !$OMP ispec,iglob,ispec_p,&
+    !$OMP i,j,k,&
+    !$OMP thread_id)
+
+    thread_id = OMP_get_thread_num()+1
+
+    ! we retrive the subset of the total elements determined by the mesh
+    ! coloring. This number changes as we iterate through the colors
+    num_elements = num_elem_colors_elastic(icolor)
+    !$OMP DO
+    do ispec_p = estart,(estart-1)+num_elements
+
+
+       ! returns element id from stored element list
+       ispec = phase_ispec_inner_elastic(ispec_p,iphase)
+
+       ! adjoint simulations: moho kernel
+       if( SIMULATION_TYPE == 3 .and. SAVE_MOHO_MESH ) then
+          if (is_moho_top(ispec)) then
+             ispec2D_moho_top = ispec2D_moho_top + 1
+          else if (is_moho_bot(ispec)) then
+             ispec2D_moho_bot = ispec2D_moho_bot + 1
+          endif
+       endif ! adjoint
+
+       ! Kelvin Voigt damping: artificial viscosity around dynamic faults
+
+        ! stores displacment values in local array
+        if (allocated(Kelvin_Voigt_eta)) then
+          eta = Kelvin_Voigt_eta(ispec)   
+          do k=1,NGLLZ
+            do j=1,NGLLY
+              do i=1,NGLLX
+                iglob = ibool(i,j,k,ispec)
+                dummyx_loc(i,j,k,thread_id) = displ(1,iglob) + eta*veloc(1,iglob)
+                dummyy_loc(i,j,k,thread_id) = displ(2,iglob) + eta*veloc(2,iglob)
+                dummyz_loc(i,j,k,thread_id) = displ(3,iglob) + eta*veloc(3,iglob)
+              enddo
+            enddo
+          enddo
+
+        else
+          do k=1,NGLLZ
+            do j=1,NGLLY
+              do i=1,NGLLX
+                iglob = ibool(i,j,k,ispec)
+                dummyx_loc(i,j,k,thread_id) = displ(1,iglob)
+                dummyy_loc(i,j,k,thread_id) = displ(2,iglob)
+                dummyz_loc(i,j,k,thread_id) = displ(3,iglob)
+              enddo
+            enddo
+          enddo
+        endif
+
+       ! stores velocity values in local array
+       do k=1,NGLLZ
+          do j=1,NGLLY
+             do i=1,NGLLX
+                iglob = ibool(i,j,k,ispec)
+                dummyx_loc_att(i,j,k,thread_id) = veloc(1,iglob)
+                dummyy_loc_att(i,j,k,thread_id) = veloc(2,iglob)
+                dummyz_loc_att(i,j,k,thread_id) = veloc(3,iglob)
+             enddo
+          enddo
+       enddo
+
+       ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+       ! for incompressible fluid flow, Cambridge University Press (2002),
+       ! pages 386 and 389 and Figure 8.3.1
+       ! call mxm_m1_m2_5points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
+       do j=1,m2
+          do i=1,m1
+             tempx1(i,j,1,thread_id) = &
+                  hprime_xx(i,1)*dummyx_loc(1,j,1,thread_id) + &
+                  hprime_xx(i,2)*dummyx_loc(2,j,1,thread_id) + &
+                  hprime_xx(i,3)*dummyx_loc(3,j,1,thread_id) + &
+                  hprime_xx(i,4)*dummyx_loc(4,j,1,thread_id) + &
+                  hprime_xx(i,5)*dummyx_loc(5,j,1,thread_id)
+             tempy1(i,j,1,thread_id) = &
+                  hprime_xx(i,1)*dummyy_loc(1,j,1,thread_id) + &
+                  hprime_xx(i,2)*dummyy_loc(2,j,1,thread_id) + &
+                  hprime_xx(i,3)*dummyy_loc(3,j,1,thread_id) + &
+                  hprime_xx(i,4)*dummyy_loc(4,j,1,thread_id) + &
+                  hprime_xx(i,5)*dummyy_loc(5,j,1,thread_id)
+             tempz1(i,j,1,thread_id) = &
+                  hprime_xx(i,1)*dummyz_loc(1,j,1,thread_id) + &
+                  hprime_xx(i,2)*dummyz_loc(2,j,1,thread_id) + &
+                  hprime_xx(i,3)*dummyz_loc(3,j,1,thread_id) + &
+                  hprime_xx(i,4)*dummyz_loc(4,j,1,thread_id) + &
+                  hprime_xx(i,5)*dummyz_loc(5,j,1,thread_id)
+          enddo
+       enddo
+
+       !   call mxm_m1_m1_5points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
+       !          hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
+       do j=1,m1
+          do i=1,m1
+             ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+             do k = 1,NGLLX
+                tempx2(i,j,k,thread_id) = dummyx_loc(i,1,k,thread_id)*hprime_xxT(1,j) + &
+                     dummyx_loc(i,2,k,thread_id)*hprime_xxT(2,j) + &
+                     dummyx_loc(i,3,k,thread_id)*hprime_xxT(3,j) + &
+                     dummyx_loc(i,4,k,thread_id)*hprime_xxT(4,j) + &
+                     dummyx_loc(i,5,k,thread_id)*hprime_xxT(5,j)
+                tempy2(i,j,k,thread_id) = dummyy_loc(i,1,k,thread_id)*hprime_xxT(1,j) + &
+                     dummyy_loc(i,2,k,thread_id)*hprime_xxT(2,j) + &
+                     dummyy_loc(i,3,k,thread_id)*hprime_xxT(3,j) + &
+                     dummyy_loc(i,4,k,thread_id)*hprime_xxT(4,j) + &
+                     dummyy_loc(i,5,k,thread_id)*hprime_xxT(5,j)
+                tempz2(i,j,k,thread_id) = dummyz_loc(i,1,k,thread_id)*hprime_xxT(1,j) + &
+                     dummyz_loc(i,2,k,thread_id)*hprime_xxT(2,j) + &
+                     dummyz_loc(i,3,k,thread_id)*hprime_xxT(3,j) + &
+                     dummyz_loc(i,4,k,thread_id)*hprime_xxT(4,j) + &
+                     dummyz_loc(i,5,k,thread_id)*hprime_xxT(5,j)
+             enddo
+          enddo
+       enddo
+
+       ! call mxm_m2_m1_5points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
+       do j=1,m1
+          do i=1,m2
+             tempx3(i,1,j,thread_id) = &
+                  dummyx_loc(i,1,1,thread_id)*hprime_xxT(1,j) + &
+                  dummyx_loc(i,1,2,thread_id)*hprime_xxT(2,j) + &
+                  dummyx_loc(i,1,3,thread_id)*hprime_xxT(3,j) + &
+                  dummyx_loc(i,1,4,thread_id)*hprime_xxT(4,j) + &
+                  dummyx_loc(i,1,5,thread_id)*hprime_xxT(5,j)
+             tempy3(i,1,j,thread_id) = &
+                  dummyy_loc(i,1,1,thread_id)*hprime_xxT(1,j) + &
+                  dummyy_loc(i,1,2,thread_id)*hprime_xxT(2,j) + &
+                  dummyy_loc(i,1,3,thread_id)*hprime_xxT(3,j) + &
+                  dummyy_loc(i,1,4,thread_id)*hprime_xxT(4,j) + &
+                  dummyy_loc(i,1,5,thread_id)*hprime_xxT(5,j)
+             tempz3(i,1,j,thread_id) = &
+                  dummyz_loc(i,1,1,thread_id)*hprime_xxT(1,j) + &
+                  dummyz_loc(i,1,2,thread_id)*hprime_xxT(2,j) + &
+                  dummyz_loc(i,1,3,thread_id)*hprime_xxT(3,j) + &
+                  dummyz_loc(i,1,4,thread_id)*hprime_xxT(4,j) + &
+                  dummyz_loc(i,1,5,thread_id)*hprime_xxT(5,j)
+
+          enddo
+       enddo
+
+       if( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
+          do j=1,m2
+             do i=1,m1
+                tempx1_att(i,j,1,thread_id) = tempx1(i,j,1,thread_id)
+                tempy1_att(i,j,1,thread_id) = tempy1(i,j,1,thread_id)
+                tempz1_att(i,j,1,thread_id) = tempz1(i,j,1,thread_id)
+             enddo
+          enddo
+
+          do j=1,m1
+             do i=1,m1
+                do k=1,NGLLX
+                   tempx2_att(i,j,k,thread_id) = tempx2(i,j,k,thread_id)
+                   tempy2_att(i,j,k,thread_id) = tempy2(i,j,k,thread_id)
+                   tempz2_att(i,j,k,thread_id) = tempz2(i,j,k,thread_id)
+                enddo
+             enddo
+          enddo
+
+          do j=1,m1
+             do i=1,m2
+                tempx3_att(i,1,j,thread_id) = tempx3(i,1,j,thread_id)
+                tempy3_att(i,1,j,thread_id) = tempy3(i,1,j,thread_id)
+                tempz3_att(i,1,j,thread_id) = tempz3(i,1,j,thread_id)
+             enddo
+          enddo
+
+          ! use first order Taylor expansion of displacement for local storage of stresses 
+          ! at this current time step, to fix attenuation in a consistent way
+          do j=1,m2
+             do i=1,m1
+                tempx1l_att(i,j,1,thread_id) = tempx1l_att(i,j,1,thread_id) + &
+                     deltat*hprime_xx(i,1)*dummyx_loc_att(1,j,1,thread_id) + &
+                     deltat*hprime_xx(i,2)*dummyx_loc_att(2,j,1,thread_id) + &
+                     deltat*hprime_xx(i,3)*dummyx_loc_att(3,j,1,thread_id) + &
+                     deltat*hprime_xx(i,4)*dummyx_loc_att(4,j,1,thread_id) + &
+                     deltat*hprime_xx(i,5)*dummyx_loc_att(5,j,1,thread_id)
+
+                tempy1l_att(i,j,1,thread_id) = tempy1l_att(i,j,1,thread_id) + &
+                     deltat*hprime_xx(i,1)*dummyy_loc_att(1,j,1,thread_id) + &
+                     deltat*hprime_xx(i,2)*dummyy_loc_att(2,j,1,thread_id) + &
+                     deltat*hprime_xx(i,3)*dummyy_loc_att(3,j,1,thread_id) + &
+                     deltat*hprime_xx(i,4)*dummyy_loc_att(4,j,1,thread_id) + &
+                     deltat*hprime_xx(i,5)*dummyy_loc_att(5,j,1,thread_id)
+
+                tempz1l_att(i,j,1,thread_id) = tempz1l_att(i,j,1,thread_id) + &
+                     deltat*hprime_xx(i,1)*dummyz_loc_att(1,j,1,thread_id) + &
+                     deltat*hprime_xx(i,2)*dummyz_loc_att(2,j,1,thread_id) + &
+                     deltat*hprime_xx(i,3)*dummyz_loc_att(3,j,1,thread_id) + &
+                     deltat*hprime_xx(i,4)*dummyz_loc_att(4,j,1,thread_id) + &
+                     deltat*hprime_xx(i,5)*dummyz_loc_att(5,j,1,thread_id)
+             enddo
+          enddo
+
+          do j=1,m1
+             do i=1,m1
+                do k=1,NGLLX
+                   tempx2l_att(i,j,k,thread_id) = tempx2l_att(i,j,k,thread_id) + &
+                        deltat*dummyx_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
+                        deltat*dummyx_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
+                        deltat*dummyx_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
+                        deltat*dummyx_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
+                        deltat*dummyx_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
+
+                   tempy2l_att(i,j,k,thread_id) = tempy2l_att(i,j,k,thread_id) + &
+                        deltat*dummyy_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
+                        deltat*dummyy_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
+                        deltat*dummyy_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
+                        deltat*dummyy_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
+                        deltat*dummyy_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
+
+                   tempz2l_att(i,j,k,thread_id) = tempz2l_att(i,j,k,thread_id) + &
+                        deltat*dummyz_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
+                        deltat*dummyz_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
+                        deltat*dummyz_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
+                        deltat*dummyz_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
+                        deltat*dummyz_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
+                enddo
+             enddo
+          enddo
+
+          do j=1,m1
+             do i=1,m2
+                tempx3l_att(i,1,j,thread_id) = tempx3l_att(i,1,j,thread_id) + &
+                     deltat*dummyx_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
+                     deltat*dummyx_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
+                     deltat*dummyx_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
+                     deltat*dummyx_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
+                     deltat*dummyx_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
+
+                tempy3l_att(i,1,j,thread_id) = tempy3l_att(i,1,j,thread_id) + &
+                     deltat*dummyy_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
+                     deltat*dummyy_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
+                     deltat*dummyy_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
+                     deltat*dummyy_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
+                     deltat*dummyy_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
+
+                tempz3l_att(i,1,j,thread_id) = tempz3l_att(i,1,j,thread_id) + &
+                     deltat*dummyz_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
+                     deltat*dummyz_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
+                     deltat*dummyz_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
+                     deltat*dummyz_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
+                     deltat*dummyz_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
+             enddo
+          endif
+       endif
+
+       do k=1,NGLLZ
+          do j=1,NGLLY
+             do i=1,NGLLX
+                ! get derivatives of ux, uy and uz with respect to x, y and z
+                xixl = xix(i,j,k,ispec)
+                xiyl = xiy(i,j,k,ispec)
+                xizl = xiz(i,j,k,ispec)
+                etaxl = etax(i,j,k,ispec)
+                etayl = etay(i,j,k,ispec)
+                etazl = etaz(i,j,k,ispec)
+                gammaxl = gammax(i,j,k,ispec)
+                gammayl = gammay(i,j,k,ispec)
+                gammazl = gammaz(i,j,k,ispec)
+                jacobianl = jacobian(i,j,k,ispec)
+
+                duxdxl = xixl*tempx1(i,j,k,thread_id) + etaxl*tempx2(i,j,k,thread_id) + gammaxl*tempx3(i,j,k,thread_id)
+                duxdyl = xiyl*tempx1(i,j,k,thread_id) + etayl*tempx2(i,j,k,thread_id) + gammayl*tempx3(i,j,k,thread_id)
+                duxdzl = xizl*tempx1(i,j,k,thread_id) + etazl*tempx2(i,j,k,thread_id) + gammazl*tempx3(i,j,k,thread_id)
+
+                duydxl = xixl*tempy1(i,j,k,thread_id) + etaxl*tempy2(i,j,k,thread_id) + gammaxl*tempy3(i,j,k,thread_id)
+                duydyl = xiyl*tempy1(i,j,k,thread_id) + etayl*tempy2(i,j,k,thread_id) + gammayl*tempy3(i,j,k,thread_id)
+                duydzl = xizl*tempy1(i,j,k,thread_id) + etazl*tempy2(i,j,k,thread_id) + gammazl*tempy3(i,j,k,thread_id)
+
+                duzdxl = xixl*tempz1(i,j,k,thread_id) + etaxl*tempz2(i,j,k,thread_id) + gammaxl*tempz3(i,j,k,thread_id)
+                duzdyl = xiyl*tempz1(i,j,k,thread_id) + etayl*tempz2(i,j,k,thread_id) + gammayl*tempz3(i,j,k,thread_id)
+                duzdzl = xizl*tempz1(i,j,k,thread_id) + etazl*tempz2(i,j,k,thread_id) + gammazl*tempz3(i,j,k,thread_id)
+
+                ! save strain on the Moho boundary
+                if (SAVE_MOHO_MESH ) then
+                   if (is_moho_top(ispec)) then
+                      dsdx_top(1,1,i,j,k,ispec2D_moho_top) = duxdxl
+                      dsdx_top(1,2,i,j,k,ispec2D_moho_top) = duxdyl
+                      dsdx_top(1,3,i,j,k,ispec2D_moho_top) = duxdzl
+                      dsdx_top(2,1,i,j,k,ispec2D_moho_top) = duydxl
+                      dsdx_top(2,2,i,j,k,ispec2D_moho_top) = duydyl
+                      dsdx_top(2,3,i,j,k,ispec2D_moho_top) = duydzl
+                      dsdx_top(3,1,i,j,k,ispec2D_moho_top) = duzdxl
+                      dsdx_top(3,2,i,j,k,ispec2D_moho_top) = duzdyl
+                      dsdx_top(3,3,i,j,k,ispec2D_moho_top) = duzdzl
+                   else if (is_moho_bot(ispec)) then
+                      dsdx_bot(1,1,i,j,k,ispec2D_moho_bot) = duxdxl
+                      dsdx_bot(1,2,i,j,k,ispec2D_moho_bot) = duxdyl
+                      dsdx_bot(1,3,i,j,k,ispec2D_moho_bot) = duxdzl
+                      dsdx_bot(2,1,i,j,k,ispec2D_moho_bot) = duydxl
+                      dsdx_bot(2,2,i,j,k,ispec2D_moho_bot) = duydyl
+                      dsdx_bot(2,3,i,j,k,ispec2D_moho_bot) = duydzl
+                      dsdx_bot(3,1,i,j,k,ispec2D_moho_bot) = duzdxl
+                      dsdx_bot(3,2,i,j,k,ispec2D_moho_bot) = duzdyl
+                      dsdx_bot(3,3,i,j,k,ispec2D_moho_bot) = duzdzl
+                   endif
+                endif
+
+                ! precompute some sums to save CPU time
+                duxdxl_plus_duydyl = duxdxl + duydyl
+                duxdxl_plus_duzdzl = duxdxl + duzdzl
+                duydyl_plus_duzdzl = duydyl + duzdzl
+                duxdyl_plus_duydxl = duxdyl + duydxl
+                duzdxl_plus_duxdzl = duzdxl + duxdzl
+                duzdyl_plus_duydzl = duzdyl + duydzl
+                
+                if( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
+                   ! temporary variables used for fixing attenuation in a consistent way
+                   duxdxl_att = xixl*tempx1l_att(i,j,k,thread_id) + etaxl*tempx2l_att(i,j,k,thread_id) + &
+                        gammaxl*tempx3l_att(i,j,k,thread_id)
+                   duxdyl_att = xiyl*tempx1l_att(i,j,k,thread_id) + etayl*tempx2l_att(i,j,k,thread_id) + &
+                        gammayl*tempx3l_att(i,j,k,thread_id)
+                   duxdzl_att = xizl*tempx1l_att(i,j,k,thread_id) + etazl*tempx2l_att(i,j,k,thread_id) + &
+                        gammazl*tempx3l_att(i,j,k,thread_id)
+
+                   duydxl_att = xixl*tempy1l_att(i,j,k,thread_id) + etaxl*tempy2l_att(i,j,k,thread_id) + &
+                        gammaxl*tempy3l_att(i,j,k,thread_id)
+                   duydyl_att = xiyl*tempy1l_att(i,j,k,thread_id) + etayl*tempy2l_att(i,j,k,thread_id) + &
+                        gammayl*tempy3l_att(i,j,k,thread_id)
+                   duydzl_att = xizl*tempy1l_att(i,j,k,thread_id) + etazl*tempy2l_att(i,j,k,thread_id) + &
+                        gammazl*tempy3l_att(i,j,k,thread_id)
+
+                   duzdxl_att = xixl*tempz1l_att(i,j,k,thread_id) + etaxl*tempz2l_att(i,j,k,thread_id) + &
+                        gammaxl*tempz3l_att(i,j,k,thread_id)
+                   duzdyl_att = xiyl*tempz1l_att(i,j,k,thread_id) + etayl*tempz2l_att(i,j,k,thread_id) + &
+                        gammayl*tempz3l_att(i,j,k,thread_id)
+                   duzdzl_att = xizl*tempz1l_att(i,j,k,thread_id) + etazl*tempz2l_att(i,j,k,thread_id) + &
+                        gammazl*tempz3l_att(i,j,k,thread_id)
+
+                   ! precompute some sums to save CPU time
+                   duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
+                   duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
+                   duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
+
+                   ! compute deviatoric strain
+                   if( SIMULATION_TYPE == 3 ) &
+                        epsilon_trace_over_3(i,j,k,ispec) = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
+                   epsilondev_xx_loc(i,j,k) = duxdxl_att - epsilon_trace_over_3(i,j,k,ispec)
+                   epsilondev_yy_loc(i,j,k) = duydyl_att - epsilon_trace_over_3(i,j,k,ispec)
+                   epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl_att
+                   epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
+                   epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl_att
+                else
+                   ! computes deviatoric strain attenuation and/or for kernel calculations
+                   if (COMPUTE_AND_STORE_STRAIN) then
+                      templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+                      if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
+                      epsilondev_xx_loc(i,j,k) = duxdxl - templ
+                      epsilondev_yy_loc(i,j,k) = duydyl - templ
+                      epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl
+                      epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl
+                      epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl
+                   endif
+                endif
+
+                kappal = kappastore(i,j,k,ispec)
+                mul = mustore(i,j,k,ispec)
+
+                ! attenuation
+                if(ATTENUATION) then
+                   ! use unrelaxed parameters if attenuation
+                   mul  = mul * one_minus_sum_beta(i,j,k,ispec)
+                endif
+
+                ! full anisotropic case, stress calculations
+                if(ANISOTROPY) then
+                   c11 = c11store(i,j,k,ispec)
+                   c12 = c12store(i,j,k,ispec)
+                   c13 = c13store(i,j,k,ispec)
+                   c14 = c14store(i,j,k,ispec)
+                   c15 = c15store(i,j,k,ispec)
+                   c16 = c16store(i,j,k,ispec)
+                   c22 = c22store(i,j,k,ispec)
+                   c23 = c23store(i,j,k,ispec)
+                   c24 = c24store(i,j,k,ispec)
+                   c25 = c25store(i,j,k,ispec)
+                   c26 = c26store(i,j,k,ispec)
+                   c33 = c33store(i,j,k,ispec)
+                   c34 = c34store(i,j,k,ispec)
+                   c35 = c35store(i,j,k,ispec)
+                   c36 = c36store(i,j,k,ispec)
+                   c44 = c44store(i,j,k,ispec)
+                   c45 = c45store(i,j,k,ispec)
+                   c46 = c46store(i,j,k,ispec)
+                   c55 = c55store(i,j,k,ispec)
+                   c56 = c56store(i,j,k,ispec)
+                   c66 = c66store(i,j,k,ispec)
+
+                   sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
+                        c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
+                   sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
+                        c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
+                   sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
+                        c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
+                   sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
+                        c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
+                   sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
+                        c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
+                   sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
+                        c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
+
+                else
+
+                   ! isotropic case
+                   lambdalplus2mul = kappal + FOUR_THIRDS * mul
+                   lambdal = lambdalplus2mul - 2.*mul
+
+                   ! compute stress sigma
+                   sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
+                   sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
+                   sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
+
+                   sigma_xy = mul*duxdyl_plus_duydxl
+                   sigma_xz = mul*duzdxl_plus_duxdzl
+                   sigma_yz = mul*duzdyl_plus_duydzl
+
+                endif ! ANISOTROPY
+
+                ! subtract memory variables if attenuation
+                if(ATTENUATION) then
+                   ! way 1
+                   !                do i_sls = 1,N_SLS
+                   !                  R_xx_val = R_xx(i,j,k,ispec,i_sls)
+                   !                  R_yy_val = R_yy(i,j,k,ispec,i_sls)
+                   !                  sigma_xx = sigma_xx - R_xx_val
+                   !                  sigma_yy = sigma_yy - R_yy_val
+                   !                  sigma_zz = sigma_zz + R_xx_val + R_yy_val
+                   !                  sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
+                   !                  sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
+                   !                  sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
+                   !                enddo
+
+                   ! way 2
+                   ! note: this should help compilers to pipeline the code and make better use of the cache;
+                   !          depending on compilers, it can further decrease the computation time by ~ 30%.
+                   !          by default, N_SLS = 3, therefore we take steps of 3
+                   if(imodulo_N_SLS >= 1) then
+                      do i_sls = 1,imodulo_N_SLS
+                         R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
+                         R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
+                         sigma_xx = sigma_xx - R_xx_val1
+                         sigma_yy = sigma_yy - R_yy_val1
+                         sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
+                         sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
+                         sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
+                         sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
+                      enddo
+                   endif
+
+                   if(N_SLS >= imodulo_N_SLS+1) then
+                      do i_sls = imodulo_N_SLS+1,N_SLS,3
+                         R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
+                         R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
+                         sigma_xx = sigma_xx - R_xx_val1
+                         sigma_yy = sigma_yy - R_yy_val1
+                         sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
+                         sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
+                         sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
+                         sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
+
+                         R_xx_val2 = R_xx(i,j,k,ispec,i_sls+1)
+                         R_yy_val2 = R_yy(i,j,k,ispec,i_sls+1)
+                         sigma_xx = sigma_xx - R_xx_val2
+                         sigma_yy = sigma_yy - R_yy_val2
+                         sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2
+                         sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+1)
+                         sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+1)
+                         sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+1)
+
+                         R_xx_val3 = R_xx(i,j,k,ispec,i_sls+2)
+                         R_yy_val3 = R_yy(i,j,k,ispec,i_sls+2)
+                         sigma_xx = sigma_xx - R_xx_val3
+                         sigma_yy = sigma_yy - R_yy_val3
+                         sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3
+                         sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+2)
+                         sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+2)
+                         sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+2)
+                      enddo
+                   endif
+
+
+                endif
+
+                ! form dot product with test vector, symmetric form
+                tempx1(i,j,k,thread_id) = jacobianl * (sigma_xx*xixl + sigma_xy*xiyl + sigma_xz*xizl)
+                tempy1(i,j,k,thread_id) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_yz*xizl)
+                tempz1(i,j,k,thread_id) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl)
+
+                tempx2(i,j,k,thread_id) = jacobianl * (sigma_xx*etaxl + sigma_xy*etayl + sigma_xz*etazl)
+                tempy2(i,j,k,thread_id) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_yz*etazl)
+                tempz2(i,j,k,thread_id) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl)
+
+                tempx3(i,j,k,thread_id) = jacobianl * (sigma_xx*gammaxl + sigma_xy*gammayl + sigma_xz*gammazl)
+                tempy3(i,j,k,thread_id) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_yz*gammazl)
+                tempz3(i,j,k,thread_id) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl)
+
+             enddo
+          enddo
+       enddo
+
+       ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+       ! for incompressible fluid flow, Cambridge University Press (2002),
+       ! pages 386 and 389 and Figure 8.3.1
+       ! call mxm_m1_m2_5points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
+       do j=1,m2
+          do i=1,m1
+             newtempx1(i,j,1,thread_id) = &
+                  hprimewgll_xxT(i,1)*tempx1(1,j,1,thread_id) + &
+                  hprimewgll_xxT(i,2)*tempx1(2,j,1,thread_id) + &
+                  hprimewgll_xxT(i,3)*tempx1(3,j,1,thread_id) + &
+                  hprimewgll_xxT(i,4)*tempx1(4,j,1,thread_id) + &
+                  hprimewgll_xxT(i,5)*tempx1(5,j,1,thread_id)
+             newtempy1(i,j,1,thread_id) = &
+                  hprimewgll_xxT(i,1)*tempy1(1,j,1,thread_id) + &
+                  hprimewgll_xxT(i,2)*tempy1(2,j,1,thread_id) + &
+                  hprimewgll_xxT(i,3)*tempy1(3,j,1,thread_id) + &
+                  hprimewgll_xxT(i,4)*tempy1(4,j,1,thread_id) + &
+                  hprimewgll_xxT(i,5)*tempy1(5,j,1,thread_id)
+             newtempz1(i,j,1,thread_id) = &
+                  hprimewgll_xxT(i,1)*tempz1(1,j,1,thread_id) + &
+                  hprimewgll_xxT(i,2)*tempz1(2,j,1,thread_id) + &
+                  hprimewgll_xxT(i,3)*tempz1(3,j,1,thread_id) + &
+                  hprimewgll_xxT(i,4)*tempz1(4,j,1,thread_id) + &
+                  hprimewgll_xxT(i,5)*tempz1(5,j,1,thread_id)
+          enddo
+       enddo
+
+       !   call mxm_m1_m1_5points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
+       !         hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
+       do i=1,m1
+          do j=1,m1
+             ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+             do k = 1,NGLLX
+                newtempx2(i,j,k,thread_id) = tempx2(i,1,k,thread_id)*hprimewgll_xx(1,j) + &
+                     tempx2(i,2,k,thread_id)*hprimewgll_xx(2,j) + &
+                     tempx2(i,3,k,thread_id)*hprimewgll_xx(3,j) + &
+                     tempx2(i,4,k,thread_id)*hprimewgll_xx(4,j) + &
+                     tempx2(i,5,k,thread_id)*hprimewgll_xx(5,j)
+                newtempy2(i,j,k,thread_id) = tempy2(i,1,k,thread_id)*hprimewgll_xx(1,j) + &
+                     tempy2(i,2,k,thread_id)*hprimewgll_xx(2,j) + &
+                     tempy2(i,3,k,thread_id)*hprimewgll_xx(3,j) + &
+                     tempy2(i,4,k,thread_id)*hprimewgll_xx(4,j) + &
+                     tempy2(i,5,k,thread_id)*hprimewgll_xx(5,j)
+                newtempz2(i,j,k,thread_id) = tempz2(i,1,k,thread_id)*hprimewgll_xx(1,j) + &
+                     tempz2(i,2,k,thread_id)*hprimewgll_xx(2,j) + &
+                     tempz2(i,3,k,thread_id)*hprimewgll_xx(3,j) + &
+                     tempz2(i,4,k,thread_id)*hprimewgll_xx(4,j) + &
+                     tempz2(i,5,k,thread_id)*hprimewgll_xx(5,j)
+             enddo
+          enddo
+       enddo
+
+       ! call mxm_m2_m1_5points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
+       do j=1,m1
+          do i=1,m2
+             newtempx3(i,1,j,thread_id) = &
+                  tempx3(i,1,1,thread_id)*hprimewgll_xx(1,j) + &
+                  tempx3(i,1,2,thread_id)*hprimewgll_xx(2,j) + &
+                  tempx3(i,1,3,thread_id)*hprimewgll_xx(3,j) + &
+                  tempx3(i,1,4,thread_id)*hprimewgll_xx(4,j) + &
+                  tempx3(i,1,5,thread_id)*hprimewgll_xx(5,j)
+             newtempy3(i,1,j,thread_id) = &
+                  tempy3(i,1,1,thread_id)*hprimewgll_xx(1,j) + &
+                  tempy3(i,1,2,thread_id)*hprimewgll_xx(2,j) + &
+                  tempy3(i,1,3,thread_id)*hprimewgll_xx(3,j) + &
+                  tempy3(i,1,4,thread_id)*hprimewgll_xx(4,j) + &
+                  tempy3(i,1,5,thread_id)*hprimewgll_xx(5,j)
+             newtempz3(i,1,j,thread_id) = &
+                  tempz3(i,1,1,thread_id)*hprimewgll_xx(1,j) + &
+                  tempz3(i,1,2,thread_id)*hprimewgll_xx(2,j) + &
+                  tempz3(i,1,3,thread_id)*hprimewgll_xx(3,j) + &
+                  tempz3(i,1,4,thread_id)*hprimewgll_xx(4,j) + &
+                  tempz3(i,1,5,thread_id)*hprimewgll_xx(5,j)
+          enddo
+       enddo
+
+       do k=1,NGLLZ
+          do j=1,NGLLY
+             do i=1,NGLLX
+
+                fac1 = wgllwgll_yz(j,k)
+                fac2 = wgllwgll_xz(i,k)
+                fac3 = wgllwgll_xy(i,j)
+
+                ! sum contributions from each element to the global mesh using indirect addressing
+                iglob = ibool(i,j,k,ispec)
+
+                ! accel_omp(1,iglob,thread_id) = accel_omp(1,iglob,thread_id)&
+                !      - fac1*newtempx1(i,j,k,thread_id) - fac2*newtempx2(i,j,k,thread_id)&
+                !      - fac3*newtempx3(i,j,k,thread_id)
+                ! accel_omp(2,iglob,thread_id) = accel_omp(2,iglob,thread_id)&
+                !      - fac1*newtempy1(i,j,k,thread_id) - fac2*newtempy2(i,j,k,thread_id)&
+                !      - fac3*newtempy3(i,j,k,thread_id)
+                ! accel_omp(3,iglob,thread_id) = accel_omp(3,iglob,thread_id)&
+                !      - fac1*newtempz1(i,j,k,thread_id) - fac2*newtempz2(i,j,k,thread_id)&
+                !      - fac3*newtempz3(i,j,k,thread_id)
+
+                ! Assembly of shared degrees of freedom fixed through mesh coloring
+                !! !$OMP ATOMIC
+                accel(1,iglob) = accel(1,iglob) &
+                                  - (fac1*newtempx1(i,j,k,thread_id) &
+                                   + fac2*newtempx2(i,j,k,thread_id) &
+                                   + fac3*newtempx3(i,j,k,thread_id))
+                !! !$OMP ATOMIC
+                accel(2,iglob) = accel(2,iglob) &
+                                  - (fac1*newtempy1(i,j,k,thread_id) &
+                                   + fac2*newtempy2(i,j,k,thread_id) &
+                                   + fac3*newtempy3(i,j,k,thread_id))
+                !! !$OMP ATOMIC
+                accel(3,iglob) = accel(3,iglob) &
+                                  - (fac1*newtempz1(i,j,k,thread_id) &
+                                   + fac2*newtempz2(i,j,k,thread_id) &
+                                   + fac3*newtempz3(i,j,k,thread_id))
+
+                ! accel(1,iglob) = accel(1,iglob) - &
+                ! (fac1*newtempx1(i,j,k,thread_id) + fac2*newtempx2(i,j,k,thread_id) + fac3*newtempx3(i,j,k,thread_id))
+                ! accel(2,iglob) = accel(2,iglob) - &
+                ! (fac1*newtempy1(i,j,k,thread_id) + fac2*newtempy2(i,j,k,thread_id) + fac3*newtempy3(i,j,k,thread_id))
+                ! accel(3,iglob) = accel(3,iglob) - &
+                ! (fac1*newtempz1(i,j,k,thread_id) + fac2*newtempz2(i,j,k,thread_id) + fac3*newtempz3(i,j,k,thread_id))
+
+                ! accel_omp(1,iglob,thread_id) = accel_omp(1,iglob,thread_id) - fac1*newtempx1(i,j,k,thread_id) - &
+                !                   fac2*newtempx2(i,j,k,thread_id) - fac3*newtempx3(i,j,k,thread_id)
+                ! accel_omp(2,iglob,thread_id) = accel_omp(2,iglob,thread_id) - fac1*newtempy1(i,j,k,thread_id) - &
+                !                   fac2*newtempy2(i,j,k,thread_id) - fac3*newtempy3(i,j,k,thread_id)
+                ! accel_omp(3,iglob,thread_id) = accel_omp(3,iglob,thread_id) - fac1*newtempz1(i,j,k,thread_id) - &
+                !      fac2*newtempz2(i,j,k,thread_id) - fac3*newtempz3(i,j,k,thread_id)
+
+                !  update memory variables based upon the Runge-Kutta scheme
+                if(ATTENUATION) then
+
+                   ! use Runge-Kutta scheme to march in time
+                   do i_sls = 1,N_SLS
+
+                      factor_loc = mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
+
+                      alphaval_loc = alphaval(i_sls)
+                      betaval_loc = betaval(i_sls)
+                      gammaval_loc = gammaval(i_sls)
+
+                      ! term in xx
+                      Sn   = factor_loc * epsilondev_xx(i,j,k,ispec)
+                      Snp1   = factor_loc * epsilondev_xx_loc(i,j,k)
+                      R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) + &
+                           betaval_loc * Sn + gammaval_loc * Snp1
+                      ! term in yy
+                      Sn   = factor_loc * epsilondev_yy(i,j,k,ispec)
+                      Snp1   = factor_loc * epsilondev_yy_loc(i,j,k)
+                      R_yy(i,j,k,ispec,i_sls) = alphaval_loc * R_yy(i,j,k,ispec,i_sls) + &
+                           betaval_loc * Sn + gammaval_loc * Snp1
+                      ! term in zz not computed since zero trace
+                      ! term in xy
+                      Sn   = factor_loc * epsilondev_xy(i,j,k,ispec)
+                      Snp1   = factor_loc * epsilondev_xy_loc(i,j,k)
+                      R_xy(i,j,k,ispec,i_sls) = alphaval_loc * R_xy(i,j,k,ispec,i_sls) + &
+                           betaval_loc * Sn + gammaval_loc * Snp1
+                      ! term in xz
+                      Sn   = factor_loc * epsilondev_xz(i,j,k,ispec)
+                      Snp1   = factor_loc * epsilondev_xz_loc(i,j,k)
+                      R_xz(i,j,k,ispec,i_sls) = alphaval_loc * R_xz(i,j,k,ispec,i_sls) + &
+                           betaval_loc * Sn + gammaval_loc * Snp1
+                      ! term in yz
+                      Sn   = factor_loc * epsilondev_yz(i,j,k,ispec)
+                      Snp1   = factor_loc * epsilondev_yz_loc(i,j,k)
+                      R_yz(i,j,k,ispec,i_sls) = alphaval_loc * R_yz(i,j,k,ispec,i_sls) + &
+                           betaval_loc * Sn + gammaval_loc * Snp1
+
+                   enddo   ! end of loop on memory variables
+
+                endif  !  end attenuation
+
+             enddo
+          enddo
+       enddo
+
+       ! save deviatoric strain for Runge-Kutta scheme
+       if ( COMPUTE_AND_STORE_STRAIN ) then
+          epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
+          epsilondev_yy(:,:,:,ispec) = epsilondev_yy_loc(:,:,:)
+          epsilondev_xy(:,:,:,ispec) = epsilondev_xy_loc(:,:,:)
+          epsilondev_xz(:,:,:,ispec) = epsilondev_xz_loc(:,:,:)
+          epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:)
+       endif
+
+    enddo  ! spectral element loop
+    !$OMP END DO
+    !$OMP END PARALLEL
+
+    ! The elements are in order of color. First we do color 1 elements,
+    ! then color 2, etc. The ispec has to moved to start at the next
+    ! color.
+    estart = estart + num_elements
+
+  enddo ! loop over colors
+
+
+  ! "stop" timer
+  ! end_time = omp_get_wtime()
+
+  ! write(*,*) "Total Elapsed time: ", (end_time-start_time) , "seconds. (Threads=",NUM_THREADS,")"
+  ! write(*,*) "Accumulate Elapsed time: ", (accumulate_time_stop-accumulate_time_start) , "seconds"
+
+
+  ! These are now allocated at the beginning and never deallocated
+  ! because the program just finishes at the end.
+
+  ! deallocate(dummyx_loc)
+  ! deallocate(dummyy_loc)
+  ! deallocate(dummyz_loc)
+  ! deallocate(dummyx_loc_att)
+  ! deallocate(dummyy_loc_att)
+  ! deallocate(dummyz_loc_att)
+  ! deallocate(newtempx1)
+  ! deallocate(newtempx2)
+  ! deallocate(newtempx3)
+  ! deallocate(newtempy1)
+  ! deallocate(newtempy2)
+  ! deallocate(newtempy3)
+  ! deallocate(newtempz1)
+  ! deallocate(newtempz2)
+  ! deallocate(newtempz3)
+  ! deallocate(tempx1)
+  ! deallocate(tempx2)
+  ! deallocate(tempx3)
+  ! deallocate(tempy1)
+  ! deallocate(tempy2)
+  ! deallocate(tempy3)
+  ! deallocate(tempz1)
+  ! deallocate(tempz2)
+  ! deallocate(tempz3)
+  ! deallocate(tempx1_att)
+  ! deallocate(tempx2_att)
+  ! deallocate(tempx3_att)
+  ! deallocate(tempy1_att)
+  ! deallocate(tempy2_att)
+  ! deallocate(tempy3_att)
+  ! deallocate(tempz1_att)
+  ! deallocate(tempz2_att)
+  ! deallocate(tempz3_att)
+
+  ! accel(:,:) = accel_omp(:,:,1)
+
+  end subroutine compute_forces_elastic_Dev_openmp
+
+