[cig-commits] r21417 - in seismo/3D/SPECFEM3D/trunk/src/specfem3D: . older_not_maintained_partial_OpenMP_port
dkomati1 at geodynamics.org
dkomati1 at geodynamics.org
Thu Feb 28 02:27:30 PST 2013
Author: dkomati1
Date: 2013-02-28 02:27:30 -0800 (Thu, 28 Feb 2013)
New Revision: 21417
Added:
seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_not_maintained_partial_OpenMP_port/
seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_not_maintained_partial_OpenMP_port/older_not_maintained_compute_forces_viscoelastic_Dev_openmp.f90
Removed:
seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_not_maintained_compute_forces_viscoelastic_Dev_openmp.f90
Log:
moved src/specfem3D/older_not_maintained_compute_forces_viscoelastic_Dev_openmp.f90 to src/specfem3D/older_not_maintained_partial_OpenMP_port
Deleted: seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_not_maintained_compute_forces_viscoelastic_Dev_openmp.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_not_maintained_compute_forces_viscoelastic_Dev_openmp.f90 2013-02-28 06:40:09 UTC (rev 21416)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_not_maintained_compute_forces_viscoelastic_Dev_openmp.f90 2013-02-28 10:27:30 UTC (rev 21417)
@@ -1,984 +0,0 @@
-!=====================================================================
-!
-! S p e c f e m 3 D V e r s i o n 2 . 1
-! ---------------------------------------
-!
-! Main authors: Dimitri Komatitsch and Jeroen Tromp
-! Princeton University, USA and CNRS / INRIA / University of Pau
-! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
-! October 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! OpenMP Threaded variant by John Levesque, Max Rietmann and Olaf Schenk
-
-!! DK DK Jan 2013: beware, that OpenMP version is not maintained / supported and thus probably does not work
-
- subroutine compute_forces_viscoelastic_Dev_openmp(iphase ,NSPEC_AB,NGLOB_AB, &
- displ,veloc,accel, &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
- hprime_xx,hprime_xxT, &
- hprimewgll_xx,hprimewgll_xxT, &
- wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
- kappastore,mustore,jacobian,ibool, &
- ATTENUATION,deltat, &
- one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
- NSPEC_ATTENUATION_AB, &
- R_xx,R_yy,R_xy,R_xz,R_yz, &
- epsilondev_xx,epsilondev_yy,epsilondev_xy, &
- epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
- ANISOTROPY,NSPEC_ANISO, &
- c11store,c12store,c13store,c14store,c15store,c16store,&
- c22store,c23store,c24store,c25store,c26store,c33store,&
- c34store,c35store,c36store,c44store,c45store,c46store,&
- c55store,c56store,c66store, &
- SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
- NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
- is_moho_top,is_moho_bot, &
- dsdx_top,dsdx_bot, &
- ispec2D_moho_top,ispec2D_moho_bot, &
- num_phase_ispec_elastic,&
- phase_ispec_inner_elastic,&
- num_colors_outer_elastic,num_colors_inner_elastic)
-
-
-
- ! computes elastic tensor term
-
- use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
- N_SLS,SAVE_MOHO_MESH, &
- ONE_THIRD,FOUR_THIRDS,m1,m2
-
- ! Trying to pass these variables as subroutine arguments ran into
- ! problems, so we reference them from their module, making them
- ! accessible from this subroutine
- use specfem_par_elastic, only:dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3, &
- newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3, &
- tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3,num_elem_colors_elastic, &
- dummyx_loc_att,dummyy_loc_att,dummyz_loc_att,tempx1_att,tempx2_att,tempx3_att, &
- tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
-
- use fault_solver_dynamic, only : Kelvin_Voigt_eta
-
- implicit none
-
- integer :: NSPEC_AB,NGLOB_AB
-
- ! displacement and acceleration
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,accel
-
-
- ! arrays with mesh parameters per slice
- integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
- xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
- kappastore,mustore,jacobian
-
- ! array with derivatives of Lagrange polynomials and precalculated products
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
- real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
-
- ! memory variables and standard linear solids for attenuation
- logical :: ATTENUATION
- logical :: COMPUTE_AND_STORE_STRAIN
- integer :: NSPEC_STRAIN_ONLY, NSPEC_ADJOINT
- integer :: NSPEC_ATTENUATION_AB
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: one_minus_sum_beta
- real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: factor_common
- real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB,N_SLS) :: &
- R_xx,R_yy,R_xy,R_xz,R_yz
-
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY) :: &
- epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
- real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilon_trace_over_3
-
- ! anisotropy
- logical :: ANISOTROPY
- integer :: NSPEC_ANISO
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
- c11store,c12store,c13store,c14store,c15store,c16store, &
- c22store,c23store,c24store,c25store,c26store,c33store, &
- c34store,c35store,c36store,c44store,c45store,c46store, &
- c55store,c56store,c66store
-
- integer :: iphase
- integer :: num_phase_ispec_elastic
- integer, dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
-
- ! adjoint simulations
- integer :: SIMULATION_TYPE
- integer :: NSPEC_BOUN,NSPEC2D_MOHO
-
- ! moho kernel
- real(kind=CUSTOM_REAL),dimension(NDIM,NDIM,NGLLX,NGLLY,NGLLZ,NSPEC2D_MOHO):: &
- dsdx_top,dsdx_bot
- logical,dimension(NSPEC_BOUN) :: is_moho_top,is_moho_bot
- integer :: ispec2D_moho_top, ispec2D_moho_bot
-
- ! local attenuation parameters
- real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: epsilondev_xx_loc, &
- epsilondev_yy_loc, epsilondev_xy_loc, epsilondev_xz_loc, epsilondev_yz_loc
- real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3
- real(kind=CUSTOM_REAL) factor_loc,alphaval_loc,betaval_loc,gammaval_loc
- real(kind=CUSTOM_REAL) Sn,Snp1
- real(kind=CUSTOM_REAL) templ
-
- real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
- real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
-
- real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
- real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
-
- real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
-
- real(kind=CUSTOM_REAL) fac1,fac2,fac3
-
- real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
- real(kind=CUSTOM_REAL) kappal
-
- real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
- real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att;
- real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att;
-
- integer OMP_get_thread_num
- integer OMP_GET_MAX_THREADS
-
- ! timing
- !double precision omp_get_wtime
- !double precision start_time
- !double precision end_time
- !double precision accumulate_time_start
- !double precision accumulate_time_stop
-
- ! local anisotropy parameters
- real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
- c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
-
- integer i_SLS,imodulo_N_SLS
- integer ispec,iglob,ispec_p,num_elements
- integer i,j,k
- integer thread_id
- integer NUM_THREADS
- !integer omp_get_num_threads ! function
-
- ! coloring additions
- ! integer, dimension(:), allocatable :: num_elem_colors_elastic
- integer istart, estart, number_of_colors
- integer num_colors_outer_elastic, num_colors_inner_elastic
- integer icolor
-
- real(kind=CUSTOM_REAL) :: eta
-
- ! write(*,*) "num_elem_colors_elastic(1) = ",num_elem_colors_elastic(1)
- imodulo_N_SLS = mod(N_SLS,3)
-
- ! NUM_THREADS = 1
- NUM_THREADS = OMP_GET_MAX_THREADS()
-
-
- ! choses inner/outer elements
- if( iphase == 1 ) then
- number_of_colors = num_colors_outer_elastic
- istart = 1
- else
- number_of_colors = num_colors_inner_elastic + num_colors_outer_elastic
- istart = num_colors_outer_elastic+1
- ! istart = num_colors_outer_elastic
- endif
-
- ! "start" timer
- ! start_time = omp_get_wtime()
-
- ! The mesh coloring algorithm provides disjoint sets of elements that
- ! do not share degrees of freedom which is required for the assembly
- ! step at the "accel(iglob) += update" step. The coloring is
- ! implemented, such that the element and node indices are ordered by
- ! color. This requires then only to iterate through the elements in
- ! order, stopping to synchronize threads after all the elements in a
- ! color are finished.
- estart = 1
- do icolor = istart, number_of_colors
-
- !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(&
- !$OMP R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3,&
- !$OMP factor_loc,alphaval_loc,betaval_loc,gammaval_loc,&
- !$OMP Sn,Snp1,&
- !$OMP templ,&
- !$OMP xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl,&
- !$OMP duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl,&
- !$OMP duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl,&
- !$OMP duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl,&
- !$OMP sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,&
- !$OMP fac1,fac2,fac3,&
- !$OMP lambdal,mul,lambdalplus2mul,kappal,&
- !$OMP c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
- !$OMP c33,c34,c35,c36,c44,c45,c46,c55,c56,c66,&
- !$OMP i_SLS,&
- !$OMP ispec,iglob,ispec_p,&
- !$OMP i,j,k,&
- !$OMP thread_id)
-
- thread_id = OMP_get_thread_num()+1
-
- ! we retrive the subset of the total elements determined by the mesh
- ! coloring. This number changes as we iterate through the colors
- num_elements = num_elem_colors_elastic(icolor)
- !$OMP DO
- do ispec_p = estart,(estart-1)+num_elements
-
-
- ! returns element id from stored element list
- ispec = phase_ispec_inner_elastic(ispec_p,iphase)
-
- ! adjoint simulations: moho kernel
- if( SIMULATION_TYPE == 3 .and. SAVE_MOHO_MESH ) then
- if (is_moho_top(ispec)) then
- ispec2D_moho_top = ispec2D_moho_top + 1
- else if (is_moho_bot(ispec)) then
- ispec2D_moho_bot = ispec2D_moho_bot + 1
- endif
- endif ! adjoint
-
- ! Kelvin Voigt damping: artificial viscosity around dynamic faults
-
- ! stores displacment values in local array
- if (allocated(Kelvin_Voigt_eta)) then
- eta = Kelvin_Voigt_eta(ispec)
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
- dummyx_loc(i,j,k,thread_id) = displ(1,iglob) + eta*veloc(1,iglob)
- dummyy_loc(i,j,k,thread_id) = displ(2,iglob) + eta*veloc(2,iglob)
- dummyz_loc(i,j,k,thread_id) = displ(3,iglob) + eta*veloc(3,iglob)
- enddo
- enddo
- enddo
-
- else
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
- dummyx_loc(i,j,k,thread_id) = displ(1,iglob)
- dummyy_loc(i,j,k,thread_id) = displ(2,iglob)
- dummyz_loc(i,j,k,thread_id) = displ(3,iglob)
- enddo
- enddo
- enddo
- endif
-
- ! stores velocity values in local array
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- iglob = ibool(i,j,k,ispec)
- dummyx_loc_att(i,j,k,thread_id) = veloc(1,iglob)
- dummyy_loc_att(i,j,k,thread_id) = veloc(2,iglob)
- dummyz_loc_att(i,j,k,thread_id) = veloc(3,iglob)
- enddo
- enddo
- enddo
-
- ! subroutines adapted from Deville, Fischer and Mund, High-order methods
- ! for incompressible fluid flow, Cambridge University Press (2002),
- ! pages 386 and 389 and Figure 8.3.1
- ! call mxm_m1_m2_5points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
- do j=1,m2
- do i=1,m1
- tempx1(i,j,1,thread_id) = &
- hprime_xx(i,1)*dummyx_loc(1,j,1,thread_id) + &
- hprime_xx(i,2)*dummyx_loc(2,j,1,thread_id) + &
- hprime_xx(i,3)*dummyx_loc(3,j,1,thread_id) + &
- hprime_xx(i,4)*dummyx_loc(4,j,1,thread_id) + &
- hprime_xx(i,5)*dummyx_loc(5,j,1,thread_id)
- tempy1(i,j,1,thread_id) = &
- hprime_xx(i,1)*dummyy_loc(1,j,1,thread_id) + &
- hprime_xx(i,2)*dummyy_loc(2,j,1,thread_id) + &
- hprime_xx(i,3)*dummyy_loc(3,j,1,thread_id) + &
- hprime_xx(i,4)*dummyy_loc(4,j,1,thread_id) + &
- hprime_xx(i,5)*dummyy_loc(5,j,1,thread_id)
- tempz1(i,j,1,thread_id) = &
- hprime_xx(i,1)*dummyz_loc(1,j,1,thread_id) + &
- hprime_xx(i,2)*dummyz_loc(2,j,1,thread_id) + &
- hprime_xx(i,3)*dummyz_loc(3,j,1,thread_id) + &
- hprime_xx(i,4)*dummyz_loc(4,j,1,thread_id) + &
- hprime_xx(i,5)*dummyz_loc(5,j,1,thread_id)
- enddo
- enddo
-
- ! call mxm_m1_m1_5points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
- ! hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
- do j=1,m1
- do i=1,m1
- ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
- do k = 1,NGLLX
- tempx2(i,j,k,thread_id) = dummyx_loc(i,1,k,thread_id)*hprime_xxT(1,j) + &
- dummyx_loc(i,2,k,thread_id)*hprime_xxT(2,j) + &
- dummyx_loc(i,3,k,thread_id)*hprime_xxT(3,j) + &
- dummyx_loc(i,4,k,thread_id)*hprime_xxT(4,j) + &
- dummyx_loc(i,5,k,thread_id)*hprime_xxT(5,j)
- tempy2(i,j,k,thread_id) = dummyy_loc(i,1,k,thread_id)*hprime_xxT(1,j) + &
- dummyy_loc(i,2,k,thread_id)*hprime_xxT(2,j) + &
- dummyy_loc(i,3,k,thread_id)*hprime_xxT(3,j) + &
- dummyy_loc(i,4,k,thread_id)*hprime_xxT(4,j) + &
- dummyy_loc(i,5,k,thread_id)*hprime_xxT(5,j)
- tempz2(i,j,k,thread_id) = dummyz_loc(i,1,k,thread_id)*hprime_xxT(1,j) + &
- dummyz_loc(i,2,k,thread_id)*hprime_xxT(2,j) + &
- dummyz_loc(i,3,k,thread_id)*hprime_xxT(3,j) + &
- dummyz_loc(i,4,k,thread_id)*hprime_xxT(4,j) + &
- dummyz_loc(i,5,k,thread_id)*hprime_xxT(5,j)
- enddo
- enddo
- enddo
-
- ! call mxm_m2_m1_5points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
- do j=1,m1
- do i=1,m2
- tempx3(i,1,j,thread_id) = &
- dummyx_loc(i,1,1,thread_id)*hprime_xxT(1,j) + &
- dummyx_loc(i,1,2,thread_id)*hprime_xxT(2,j) + &
- dummyx_loc(i,1,3,thread_id)*hprime_xxT(3,j) + &
- dummyx_loc(i,1,4,thread_id)*hprime_xxT(4,j) + &
- dummyx_loc(i,1,5,thread_id)*hprime_xxT(5,j)
- tempy3(i,1,j,thread_id) = &
- dummyy_loc(i,1,1,thread_id)*hprime_xxT(1,j) + &
- dummyy_loc(i,1,2,thread_id)*hprime_xxT(2,j) + &
- dummyy_loc(i,1,3,thread_id)*hprime_xxT(3,j) + &
- dummyy_loc(i,1,4,thread_id)*hprime_xxT(4,j) + &
- dummyy_loc(i,1,5,thread_id)*hprime_xxT(5,j)
- tempz3(i,1,j,thread_id) = &
- dummyz_loc(i,1,1,thread_id)*hprime_xxT(1,j) + &
- dummyz_loc(i,1,2,thread_id)*hprime_xxT(2,j) + &
- dummyz_loc(i,1,3,thread_id)*hprime_xxT(3,j) + &
- dummyz_loc(i,1,4,thread_id)*hprime_xxT(4,j) + &
- dummyz_loc(i,1,5,thread_id)*hprime_xxT(5,j)
-
- enddo
- enddo
-
- if( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
- do j=1,m2
- do i=1,m1
- tempx1_att(i,j,1,thread_id) = tempx1(i,j,1,thread_id)
- tempy1_att(i,j,1,thread_id) = tempy1(i,j,1,thread_id)
- tempz1_att(i,j,1,thread_id) = tempz1(i,j,1,thread_id)
- enddo
- enddo
-
- do j=1,m1
- do i=1,m1
- do k=1,NGLLX
- tempx2_att(i,j,k,thread_id) = tempx2(i,j,k,thread_id)
- tempy2_att(i,j,k,thread_id) = tempy2(i,j,k,thread_id)
- tempz2_att(i,j,k,thread_id) = tempz2(i,j,k,thread_id)
- enddo
- enddo
- enddo
-
- do j=1,m1
- do i=1,m2
- tempx3_att(i,1,j,thread_id) = tempx3(i,1,j,thread_id)
- tempy3_att(i,1,j,thread_id) = tempy3(i,1,j,thread_id)
- tempz3_att(i,1,j,thread_id) = tempz3(i,1,j,thread_id)
- enddo
- enddo
-
- ! use first order Taylor expansion of displacement for local storage of stresses
- ! at this current time step, to fix attenuation in a consistent way
- do j=1,m2
- do i=1,m1
- tempx1l_att(i,j,1,thread_id) = tempx1l_att(i,j,1,thread_id) + &
- deltat*hprime_xx(i,1)*dummyx_loc_att(1,j,1,thread_id) + &
- deltat*hprime_xx(i,2)*dummyx_loc_att(2,j,1,thread_id) + &
- deltat*hprime_xx(i,3)*dummyx_loc_att(3,j,1,thread_id) + &
- deltat*hprime_xx(i,4)*dummyx_loc_att(4,j,1,thread_id) + &
- deltat*hprime_xx(i,5)*dummyx_loc_att(5,j,1,thread_id)
-
- tempy1l_att(i,j,1,thread_id) = tempy1l_att(i,j,1,thread_id) + &
- deltat*hprime_xx(i,1)*dummyy_loc_att(1,j,1,thread_id) + &
- deltat*hprime_xx(i,2)*dummyy_loc_att(2,j,1,thread_id) + &
- deltat*hprime_xx(i,3)*dummyy_loc_att(3,j,1,thread_id) + &
- deltat*hprime_xx(i,4)*dummyy_loc_att(4,j,1,thread_id) + &
- deltat*hprime_xx(i,5)*dummyy_loc_att(5,j,1,thread_id)
-
- tempz1l_att(i,j,1,thread_id) = tempz1l_att(i,j,1,thread_id) + &
- deltat*hprime_xx(i,1)*dummyz_loc_att(1,j,1,thread_id) + &
- deltat*hprime_xx(i,2)*dummyz_loc_att(2,j,1,thread_id) + &
- deltat*hprime_xx(i,3)*dummyz_loc_att(3,j,1,thread_id) + &
- deltat*hprime_xx(i,4)*dummyz_loc_att(4,j,1,thread_id) + &
- deltat*hprime_xx(i,5)*dummyz_loc_att(5,j,1,thread_id)
- enddo
- enddo
-
- do j=1,m1
- do i=1,m1
- do k=1,NGLLX
- tempx2l_att(i,j,k,thread_id) = tempx2l_att(i,j,k,thread_id) + &
- deltat*dummyx_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
- deltat*dummyx_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
- deltat*dummyx_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
- deltat*dummyx_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
- deltat*dummyx_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
-
- tempy2l_att(i,j,k,thread_id) = tempy2l_att(i,j,k,thread_id) + &
- deltat*dummyy_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
- deltat*dummyy_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
- deltat*dummyy_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
- deltat*dummyy_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
- deltat*dummyy_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
-
- tempz2l_att(i,j,k,thread_id) = tempz2l_att(i,j,k,thread_id) + &
- deltat*dummyz_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
- deltat*dummyz_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
- deltat*dummyz_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
- deltat*dummyz_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
- deltat*dummyz_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
- enddo
- enddo
- enddo
-
- do j=1,m1
- do i=1,m2
- tempx3l_att(i,1,j,thread_id) = tempx3l_att(i,1,j,thread_id) + &
- deltat*dummyx_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
- deltat*dummyx_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
- deltat*dummyx_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
- deltat*dummyx_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
- deltat*dummyx_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
-
- tempy3l_att(i,1,j,thread_id) = tempy3l_att(i,1,j,thread_id) + &
- deltat*dummyy_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
- deltat*dummyy_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
- deltat*dummyy_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
- deltat*dummyy_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
- deltat*dummyy_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
-
- tempz3l_att(i,1,j,thread_id) = tempz3l_att(i,1,j,thread_id) + &
- deltat*dummyz_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
- deltat*dummyz_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
- deltat*dummyz_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
- deltat*dummyz_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
- deltat*dummyz_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
- enddo
- endif
- endif
-
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- ! get derivatives of ux, uy and uz with respect to x, y and z
- xixl = xix(i,j,k,ispec)
- xiyl = xiy(i,j,k,ispec)
- xizl = xiz(i,j,k,ispec)
- etaxl = etax(i,j,k,ispec)
- etayl = etay(i,j,k,ispec)
- etazl = etaz(i,j,k,ispec)
- gammaxl = gammax(i,j,k,ispec)
- gammayl = gammay(i,j,k,ispec)
- gammazl = gammaz(i,j,k,ispec)
- jacobianl = jacobian(i,j,k,ispec)
-
- duxdxl = xixl*tempx1(i,j,k,thread_id) + etaxl*tempx2(i,j,k,thread_id) + gammaxl*tempx3(i,j,k,thread_id)
- duxdyl = xiyl*tempx1(i,j,k,thread_id) + etayl*tempx2(i,j,k,thread_id) + gammayl*tempx3(i,j,k,thread_id)
- duxdzl = xizl*tempx1(i,j,k,thread_id) + etazl*tempx2(i,j,k,thread_id) + gammazl*tempx3(i,j,k,thread_id)
-
- duydxl = xixl*tempy1(i,j,k,thread_id) + etaxl*tempy2(i,j,k,thread_id) + gammaxl*tempy3(i,j,k,thread_id)
- duydyl = xiyl*tempy1(i,j,k,thread_id) + etayl*tempy2(i,j,k,thread_id) + gammayl*tempy3(i,j,k,thread_id)
- duydzl = xizl*tempy1(i,j,k,thread_id) + etazl*tempy2(i,j,k,thread_id) + gammazl*tempy3(i,j,k,thread_id)
-
- duzdxl = xixl*tempz1(i,j,k,thread_id) + etaxl*tempz2(i,j,k,thread_id) + gammaxl*tempz3(i,j,k,thread_id)
- duzdyl = xiyl*tempz1(i,j,k,thread_id) + etayl*tempz2(i,j,k,thread_id) + gammayl*tempz3(i,j,k,thread_id)
- duzdzl = xizl*tempz1(i,j,k,thread_id) + etazl*tempz2(i,j,k,thread_id) + gammazl*tempz3(i,j,k,thread_id)
-
- ! save strain on the Moho boundary
- if (SAVE_MOHO_MESH ) then
- if (is_moho_top(ispec)) then
- dsdx_top(1,1,i,j,k,ispec2D_moho_top) = duxdxl
- dsdx_top(1,2,i,j,k,ispec2D_moho_top) = duxdyl
- dsdx_top(1,3,i,j,k,ispec2D_moho_top) = duxdzl
- dsdx_top(2,1,i,j,k,ispec2D_moho_top) = duydxl
- dsdx_top(2,2,i,j,k,ispec2D_moho_top) = duydyl
- dsdx_top(2,3,i,j,k,ispec2D_moho_top) = duydzl
- dsdx_top(3,1,i,j,k,ispec2D_moho_top) = duzdxl
- dsdx_top(3,2,i,j,k,ispec2D_moho_top) = duzdyl
- dsdx_top(3,3,i,j,k,ispec2D_moho_top) = duzdzl
- else if (is_moho_bot(ispec)) then
- dsdx_bot(1,1,i,j,k,ispec2D_moho_bot) = duxdxl
- dsdx_bot(1,2,i,j,k,ispec2D_moho_bot) = duxdyl
- dsdx_bot(1,3,i,j,k,ispec2D_moho_bot) = duxdzl
- dsdx_bot(2,1,i,j,k,ispec2D_moho_bot) = duydxl
- dsdx_bot(2,2,i,j,k,ispec2D_moho_bot) = duydyl
- dsdx_bot(2,3,i,j,k,ispec2D_moho_bot) = duydzl
- dsdx_bot(3,1,i,j,k,ispec2D_moho_bot) = duzdxl
- dsdx_bot(3,2,i,j,k,ispec2D_moho_bot) = duzdyl
- dsdx_bot(3,3,i,j,k,ispec2D_moho_bot) = duzdzl
- endif
- endif
-
- ! precompute some sums to save CPU time
- duxdxl_plus_duydyl = duxdxl + duydyl
- duxdxl_plus_duzdzl = duxdxl + duzdzl
- duydyl_plus_duzdzl = duydyl + duzdzl
- duxdyl_plus_duydxl = duxdyl + duydxl
- duzdxl_plus_duxdzl = duzdxl + duxdzl
- duzdyl_plus_duydzl = duzdyl + duydzl
-
- if( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
- ! temporary variables used for fixing attenuation in a consistent way
- duxdxl_att = xixl*tempx1l_att(i,j,k,thread_id) + etaxl*tempx2l_att(i,j,k,thread_id) + &
- gammaxl*tempx3l_att(i,j,k,thread_id)
- duxdyl_att = xiyl*tempx1l_att(i,j,k,thread_id) + etayl*tempx2l_att(i,j,k,thread_id) + &
- gammayl*tempx3l_att(i,j,k,thread_id)
- duxdzl_att = xizl*tempx1l_att(i,j,k,thread_id) + etazl*tempx2l_att(i,j,k,thread_id) + &
- gammazl*tempx3l_att(i,j,k,thread_id)
-
- duydxl_att = xixl*tempy1l_att(i,j,k,thread_id) + etaxl*tempy2l_att(i,j,k,thread_id) + &
- gammaxl*tempy3l_att(i,j,k,thread_id)
- duydyl_att = xiyl*tempy1l_att(i,j,k,thread_id) + etayl*tempy2l_att(i,j,k,thread_id) + &
- gammayl*tempy3l_att(i,j,k,thread_id)
- duydzl_att = xizl*tempy1l_att(i,j,k,thread_id) + etazl*tempy2l_att(i,j,k,thread_id) + &
- gammazl*tempy3l_att(i,j,k,thread_id)
-
- duzdxl_att = xixl*tempz1l_att(i,j,k,thread_id) + etaxl*tempz2l_att(i,j,k,thread_id) + &
- gammaxl*tempz3l_att(i,j,k,thread_id)
- duzdyl_att = xiyl*tempz1l_att(i,j,k,thread_id) + etayl*tempz2l_att(i,j,k,thread_id) + &
- gammayl*tempz3l_att(i,j,k,thread_id)
- duzdzl_att = xizl*tempz1l_att(i,j,k,thread_id) + etazl*tempz2l_att(i,j,k,thread_id) + &
- gammazl*tempz3l_att(i,j,k,thread_id)
-
- ! precompute some sums to save CPU time
- duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
- duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
- duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
-
- ! compute deviatoric strain
- if( SIMULATION_TYPE == 3 ) &
- epsilon_trace_over_3(i,j,k,ispec) = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
- epsilondev_xx_loc(i,j,k) = duxdxl_att - epsilon_trace_over_3(i,j,k,ispec)
- epsilondev_yy_loc(i,j,k) = duydyl_att - epsilon_trace_over_3(i,j,k,ispec)
- epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl_att
- epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
- epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl_att
- else
- ! computes deviatoric strain attenuation and/or for kernel calculations
- if (COMPUTE_AND_STORE_STRAIN) then
- templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
- if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
- epsilondev_xx_loc(i,j,k) = duxdxl - templ
- epsilondev_yy_loc(i,j,k) = duydyl - templ
- epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl
- epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl
- epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl
- endif
- endif
-
- kappal = kappastore(i,j,k,ispec)
- mul = mustore(i,j,k,ispec)
-
- ! attenuation
- if(ATTENUATION) then
- ! use unrelaxed parameters if attenuation
- mul = mul * one_minus_sum_beta(i,j,k,ispec)
- endif
-
- ! full anisotropic case, stress calculations
- if(ANISOTROPY) then
- c11 = c11store(i,j,k,ispec)
- c12 = c12store(i,j,k,ispec)
- c13 = c13store(i,j,k,ispec)
- c14 = c14store(i,j,k,ispec)
- c15 = c15store(i,j,k,ispec)
- c16 = c16store(i,j,k,ispec)
- c22 = c22store(i,j,k,ispec)
- c23 = c23store(i,j,k,ispec)
- c24 = c24store(i,j,k,ispec)
- c25 = c25store(i,j,k,ispec)
- c26 = c26store(i,j,k,ispec)
- c33 = c33store(i,j,k,ispec)
- c34 = c34store(i,j,k,ispec)
- c35 = c35store(i,j,k,ispec)
- c36 = c36store(i,j,k,ispec)
- c44 = c44store(i,j,k,ispec)
- c45 = c45store(i,j,k,ispec)
- c46 = c46store(i,j,k,ispec)
- c55 = c55store(i,j,k,ispec)
- c56 = c56store(i,j,k,ispec)
- c66 = c66store(i,j,k,ispec)
-
- sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
- c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
- sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
- c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
- sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
- c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
- sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
- c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
- sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
- c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
- sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
- c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
-
- else
-
- ! isotropic case
- lambdalplus2mul = kappal + FOUR_THIRDS * mul
- lambdal = lambdalplus2mul - 2.*mul
-
- ! compute stress sigma
- sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
- sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
- sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
-
- sigma_xy = mul*duxdyl_plus_duydxl
- sigma_xz = mul*duzdxl_plus_duxdzl
- sigma_yz = mul*duzdyl_plus_duydzl
-
- endif ! ANISOTROPY
-
- ! subtract memory variables if attenuation
- if(ATTENUATION) then
- ! way 1
- ! do i_sls = 1,N_SLS
- ! R_xx_val = R_xx(i,j,k,ispec,i_sls)
- ! R_yy_val = R_yy(i,j,k,ispec,i_sls)
- ! sigma_xx = sigma_xx - R_xx_val
- ! sigma_yy = sigma_yy - R_yy_val
- ! sigma_zz = sigma_zz + R_xx_val + R_yy_val
- ! sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
- ! sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
- ! sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
- ! enddo
-
- ! way 2
- ! note: this should help compilers to pipeline the code and make better use of the cache;
- ! depending on compilers, it can further decrease the computation time by ~ 30%.
- ! by default, N_SLS = 3, therefore we take steps of 3
- if(imodulo_N_SLS >= 1) then
- do i_sls = 1,imodulo_N_SLS
- R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
- R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
- sigma_xx = sigma_xx - R_xx_val1
- sigma_yy = sigma_yy - R_yy_val1
- sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
- sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
- sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
- sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
- enddo
- endif
-
- if(N_SLS >= imodulo_N_SLS+1) then
- do i_sls = imodulo_N_SLS+1,N_SLS,3
- R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
- R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
- sigma_xx = sigma_xx - R_xx_val1
- sigma_yy = sigma_yy - R_yy_val1
- sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
- sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
- sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
- sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
-
- R_xx_val2 = R_xx(i,j,k,ispec,i_sls+1)
- R_yy_val2 = R_yy(i,j,k,ispec,i_sls+1)
- sigma_xx = sigma_xx - R_xx_val2
- sigma_yy = sigma_yy - R_yy_val2
- sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2
- sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+1)
- sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+1)
- sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+1)
-
- R_xx_val3 = R_xx(i,j,k,ispec,i_sls+2)
- R_yy_val3 = R_yy(i,j,k,ispec,i_sls+2)
- sigma_xx = sigma_xx - R_xx_val3
- sigma_yy = sigma_yy - R_yy_val3
- sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3
- sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+2)
- sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+2)
- sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+2)
- enddo
- endif
-
-
- endif
-
- ! form dot product with test vector, symmetric form
- tempx1(i,j,k,thread_id) = jacobianl * (sigma_xx*xixl + sigma_xy*xiyl + sigma_xz*xizl)
- tempy1(i,j,k,thread_id) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_yz*xizl)
- tempz1(i,j,k,thread_id) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl)
-
- tempx2(i,j,k,thread_id) = jacobianl * (sigma_xx*etaxl + sigma_xy*etayl + sigma_xz*etazl)
- tempy2(i,j,k,thread_id) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_yz*etazl)
- tempz2(i,j,k,thread_id) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl)
-
- tempx3(i,j,k,thread_id) = jacobianl * (sigma_xx*gammaxl + sigma_xy*gammayl + sigma_xz*gammazl)
- tempy3(i,j,k,thread_id) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_yz*gammazl)
- tempz3(i,j,k,thread_id) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl)
-
- enddo
- enddo
- enddo
-
- ! subroutines adapted from Deville, Fischer and Mund, High-order methods
- ! for incompressible fluid flow, Cambridge University Press (2002),
- ! pages 386 and 389 and Figure 8.3.1
- ! call mxm_m1_m2_5points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
- do j=1,m2
- do i=1,m1
- newtempx1(i,j,1,thread_id) = &
- hprimewgll_xxT(i,1)*tempx1(1,j,1,thread_id) + &
- hprimewgll_xxT(i,2)*tempx1(2,j,1,thread_id) + &
- hprimewgll_xxT(i,3)*tempx1(3,j,1,thread_id) + &
- hprimewgll_xxT(i,4)*tempx1(4,j,1,thread_id) + &
- hprimewgll_xxT(i,5)*tempx1(5,j,1,thread_id)
- newtempy1(i,j,1,thread_id) = &
- hprimewgll_xxT(i,1)*tempy1(1,j,1,thread_id) + &
- hprimewgll_xxT(i,2)*tempy1(2,j,1,thread_id) + &
- hprimewgll_xxT(i,3)*tempy1(3,j,1,thread_id) + &
- hprimewgll_xxT(i,4)*tempy1(4,j,1,thread_id) + &
- hprimewgll_xxT(i,5)*tempy1(5,j,1,thread_id)
- newtempz1(i,j,1,thread_id) = &
- hprimewgll_xxT(i,1)*tempz1(1,j,1,thread_id) + &
- hprimewgll_xxT(i,2)*tempz1(2,j,1,thread_id) + &
- hprimewgll_xxT(i,3)*tempz1(3,j,1,thread_id) + &
- hprimewgll_xxT(i,4)*tempz1(4,j,1,thread_id) + &
- hprimewgll_xxT(i,5)*tempz1(5,j,1,thread_id)
- enddo
- enddo
-
- ! call mxm_m1_m1_5points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
- ! hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
- do i=1,m1
- do j=1,m1
- ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
- do k = 1,NGLLX
- newtempx2(i,j,k,thread_id) = tempx2(i,1,k,thread_id)*hprimewgll_xx(1,j) + &
- tempx2(i,2,k,thread_id)*hprimewgll_xx(2,j) + &
- tempx2(i,3,k,thread_id)*hprimewgll_xx(3,j) + &
- tempx2(i,4,k,thread_id)*hprimewgll_xx(4,j) + &
- tempx2(i,5,k,thread_id)*hprimewgll_xx(5,j)
- newtempy2(i,j,k,thread_id) = tempy2(i,1,k,thread_id)*hprimewgll_xx(1,j) + &
- tempy2(i,2,k,thread_id)*hprimewgll_xx(2,j) + &
- tempy2(i,3,k,thread_id)*hprimewgll_xx(3,j) + &
- tempy2(i,4,k,thread_id)*hprimewgll_xx(4,j) + &
- tempy2(i,5,k,thread_id)*hprimewgll_xx(5,j)
- newtempz2(i,j,k,thread_id) = tempz2(i,1,k,thread_id)*hprimewgll_xx(1,j) + &
- tempz2(i,2,k,thread_id)*hprimewgll_xx(2,j) + &
- tempz2(i,3,k,thread_id)*hprimewgll_xx(3,j) + &
- tempz2(i,4,k,thread_id)*hprimewgll_xx(4,j) + &
- tempz2(i,5,k,thread_id)*hprimewgll_xx(5,j)
- enddo
- enddo
- enddo
-
- ! call mxm_m2_m1_5points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
- do j=1,m1
- do i=1,m2
- newtempx3(i,1,j,thread_id) = &
- tempx3(i,1,1,thread_id)*hprimewgll_xx(1,j) + &
- tempx3(i,1,2,thread_id)*hprimewgll_xx(2,j) + &
- tempx3(i,1,3,thread_id)*hprimewgll_xx(3,j) + &
- tempx3(i,1,4,thread_id)*hprimewgll_xx(4,j) + &
- tempx3(i,1,5,thread_id)*hprimewgll_xx(5,j)
- newtempy3(i,1,j,thread_id) = &
- tempy3(i,1,1,thread_id)*hprimewgll_xx(1,j) + &
- tempy3(i,1,2,thread_id)*hprimewgll_xx(2,j) + &
- tempy3(i,1,3,thread_id)*hprimewgll_xx(3,j) + &
- tempy3(i,1,4,thread_id)*hprimewgll_xx(4,j) + &
- tempy3(i,1,5,thread_id)*hprimewgll_xx(5,j)
- newtempz3(i,1,j,thread_id) = &
- tempz3(i,1,1,thread_id)*hprimewgll_xx(1,j) + &
- tempz3(i,1,2,thread_id)*hprimewgll_xx(2,j) + &
- tempz3(i,1,3,thread_id)*hprimewgll_xx(3,j) + &
- tempz3(i,1,4,thread_id)*hprimewgll_xx(4,j) + &
- tempz3(i,1,5,thread_id)*hprimewgll_xx(5,j)
- enddo
- enddo
-
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
-
- fac1 = wgllwgll_yz(j,k)
- fac2 = wgllwgll_xz(i,k)
- fac3 = wgllwgll_xy(i,j)
-
- ! sum contributions from each element to the global mesh using indirect addressing
- iglob = ibool(i,j,k,ispec)
-
- ! accel_omp(1,iglob,thread_id) = accel_omp(1,iglob,thread_id)&
- ! - fac1*newtempx1(i,j,k,thread_id) - fac2*newtempx2(i,j,k,thread_id)&
- ! - fac3*newtempx3(i,j,k,thread_id)
- ! accel_omp(2,iglob,thread_id) = accel_omp(2,iglob,thread_id)&
- ! - fac1*newtempy1(i,j,k,thread_id) - fac2*newtempy2(i,j,k,thread_id)&
- ! - fac3*newtempy3(i,j,k,thread_id)
- ! accel_omp(3,iglob,thread_id) = accel_omp(3,iglob,thread_id)&
- ! - fac1*newtempz1(i,j,k,thread_id) - fac2*newtempz2(i,j,k,thread_id)&
- ! - fac3*newtempz3(i,j,k,thread_id)
-
- ! Assembly of shared degrees of freedom fixed through mesh coloring
- !! !$OMP ATOMIC
- accel(1,iglob) = accel(1,iglob) &
- - (fac1*newtempx1(i,j,k,thread_id) &
- + fac2*newtempx2(i,j,k,thread_id) &
- + fac3*newtempx3(i,j,k,thread_id))
- !! !$OMP ATOMIC
- accel(2,iglob) = accel(2,iglob) &
- - (fac1*newtempy1(i,j,k,thread_id) &
- + fac2*newtempy2(i,j,k,thread_id) &
- + fac3*newtempy3(i,j,k,thread_id))
- !! !$OMP ATOMIC
- accel(3,iglob) = accel(3,iglob) &
- - (fac1*newtempz1(i,j,k,thread_id) &
- + fac2*newtempz2(i,j,k,thread_id) &
- + fac3*newtempz3(i,j,k,thread_id))
-
- ! accel(1,iglob) = accel(1,iglob) - &
- ! (fac1*newtempx1(i,j,k,thread_id) + fac2*newtempx2(i,j,k,thread_id) + fac3*newtempx3(i,j,k,thread_id))
- ! accel(2,iglob) = accel(2,iglob) - &
- ! (fac1*newtempy1(i,j,k,thread_id) + fac2*newtempy2(i,j,k,thread_id) + fac3*newtempy3(i,j,k,thread_id))
- ! accel(3,iglob) = accel(3,iglob) - &
- ! (fac1*newtempz1(i,j,k,thread_id) + fac2*newtempz2(i,j,k,thread_id) + fac3*newtempz3(i,j,k,thread_id))
-
- ! accel_omp(1,iglob,thread_id) = accel_omp(1,iglob,thread_id) - fac1*newtempx1(i,j,k,thread_id) - &
- ! fac2*newtempx2(i,j,k,thread_id) - fac3*newtempx3(i,j,k,thread_id)
- ! accel_omp(2,iglob,thread_id) = accel_omp(2,iglob,thread_id) - fac1*newtempy1(i,j,k,thread_id) - &
- ! fac2*newtempy2(i,j,k,thread_id) - fac3*newtempy3(i,j,k,thread_id)
- ! accel_omp(3,iglob,thread_id) = accel_omp(3,iglob,thread_id) - fac1*newtempz1(i,j,k,thread_id) - &
- ! fac2*newtempz2(i,j,k,thread_id) - fac3*newtempz3(i,j,k,thread_id)
-
- ! update memory variables based upon the Runge-Kutta scheme
- if(ATTENUATION) then
-
- ! use Runge-Kutta scheme to march in time
- do i_sls = 1,N_SLS
-
- factor_loc = mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
-
- alphaval_loc = alphaval(i_sls)
- betaval_loc = betaval(i_sls)
- gammaval_loc = gammaval(i_sls)
-
- ! term in xx
- Sn = factor_loc * epsilondev_xx(i,j,k,ispec)
- Snp1 = factor_loc * epsilondev_xx_loc(i,j,k)
- R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) + &
- betaval_loc * Sn + gammaval_loc * Snp1
- ! term in yy
- Sn = factor_loc * epsilondev_yy(i,j,k,ispec)
- Snp1 = factor_loc * epsilondev_yy_loc(i,j,k)
- R_yy(i,j,k,ispec,i_sls) = alphaval_loc * R_yy(i,j,k,ispec,i_sls) + &
- betaval_loc * Sn + gammaval_loc * Snp1
- ! term in zz not computed since zero trace
- ! term in xy
- Sn = factor_loc * epsilondev_xy(i,j,k,ispec)
- Snp1 = factor_loc * epsilondev_xy_loc(i,j,k)
- R_xy(i,j,k,ispec,i_sls) = alphaval_loc * R_xy(i,j,k,ispec,i_sls) + &
- betaval_loc * Sn + gammaval_loc * Snp1
- ! term in xz
- Sn = factor_loc * epsilondev_xz(i,j,k,ispec)
- Snp1 = factor_loc * epsilondev_xz_loc(i,j,k)
- R_xz(i,j,k,ispec,i_sls) = alphaval_loc * R_xz(i,j,k,ispec,i_sls) + &
- betaval_loc * Sn + gammaval_loc * Snp1
- ! term in yz
- Sn = factor_loc * epsilondev_yz(i,j,k,ispec)
- Snp1 = factor_loc * epsilondev_yz_loc(i,j,k)
- R_yz(i,j,k,ispec,i_sls) = alphaval_loc * R_yz(i,j,k,ispec,i_sls) + &
- betaval_loc * Sn + gammaval_loc * Snp1
-
- enddo ! end of loop on memory variables
-
- endif ! end attenuation
-
- enddo
- enddo
- enddo
-
- ! save deviatoric strain for Runge-Kutta scheme
- if ( COMPUTE_AND_STORE_STRAIN ) then
- epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
- epsilondev_yy(:,:,:,ispec) = epsilondev_yy_loc(:,:,:)
- epsilondev_xy(:,:,:,ispec) = epsilondev_xy_loc(:,:,:)
- epsilondev_xz(:,:,:,ispec) = epsilondev_xz_loc(:,:,:)
- epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:)
- endif
-
- enddo ! spectral element loop
- !$OMP END DO
- !$OMP END PARALLEL
-
- ! The elements are in order of color. First we do color 1 elements,
- ! then color 2, etc. The ispec has to moved to start at the next
- ! color.
- estart = estart + num_elements
-
- enddo ! loop over colors
-
-
- ! "stop" timer
- ! end_time = omp_get_wtime()
-
- ! write(*,*) "Total Elapsed time: ", (end_time-start_time) , "seconds. (Threads=",NUM_THREADS,")"
- ! write(*,*) "Accumulate Elapsed time: ", (accumulate_time_stop-accumulate_time_start) , "seconds"
-
-
- ! These are now allocated at the beginning and never deallocated
- ! because the program just finishes at the end.
-
- ! deallocate(dummyx_loc)
- ! deallocate(dummyy_loc)
- ! deallocate(dummyz_loc)
- ! deallocate(dummyx_loc_att)
- ! deallocate(dummyy_loc_att)
- ! deallocate(dummyz_loc_att)
- ! deallocate(newtempx1)
- ! deallocate(newtempx2)
- ! deallocate(newtempx3)
- ! deallocate(newtempy1)
- ! deallocate(newtempy2)
- ! deallocate(newtempy3)
- ! deallocate(newtempz1)
- ! deallocate(newtempz2)
- ! deallocate(newtempz3)
- ! deallocate(tempx1)
- ! deallocate(tempx2)
- ! deallocate(tempx3)
- ! deallocate(tempy1)
- ! deallocate(tempy2)
- ! deallocate(tempy3)
- ! deallocate(tempz1)
- ! deallocate(tempz2)
- ! deallocate(tempz3)
- ! deallocate(tempx1_att)
- ! deallocate(tempx2_att)
- ! deallocate(tempx3_att)
- ! deallocate(tempy1_att)
- ! deallocate(tempy2_att)
- ! deallocate(tempy3_att)
- ! deallocate(tempz1_att)
- ! deallocate(tempz2_att)
- ! deallocate(tempz3_att)
-
- ! accel(:,:) = accel_omp(:,:,1)
-
- end subroutine compute_forces_viscoelastic_Dev_openmp
-
-
Copied: seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_not_maintained_partial_OpenMP_port/older_not_maintained_compute_forces_viscoelastic_Dev_openmp.f90 (from rev 21416, seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_not_maintained_compute_forces_viscoelastic_Dev_openmp.f90)
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_not_maintained_partial_OpenMP_port/older_not_maintained_compute_forces_viscoelastic_Dev_openmp.f90 (rev 0)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/older_not_maintained_partial_OpenMP_port/older_not_maintained_compute_forces_viscoelastic_Dev_openmp.f90 2013-02-28 10:27:30 UTC (rev 21417)
@@ -0,0 +1,984 @@
+!=====================================================================
+!
+! S p e c f e m 3 D V e r s i o n 2 . 1
+! ---------------------------------------
+!
+! Main authors: Dimitri Komatitsch and Jeroen Tromp
+! Princeton University, USA and CNRS / INRIA / University of Pau
+! (c) Princeton University / California Institute of Technology and CNRS / INRIA / University of Pau
+! October 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! OpenMP Threaded variant by John Levesque, Max Rietmann and Olaf Schenk
+
+!! DK DK Jan 2013: beware, that OpenMP version is not maintained / supported and thus probably does not work
+
+ subroutine compute_forces_viscoelastic_Dev_openmp(iphase ,NSPEC_AB,NGLOB_AB, &
+ displ,veloc,accel, &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+ hprime_xx,hprime_xxT, &
+ hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
+ kappastore,mustore,jacobian,ibool, &
+ ATTENUATION,deltat, &
+ one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
+ NSPEC_ATTENUATION_AB, &
+ R_xx,R_yy,R_xy,R_xz,R_yz, &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+ epsilondev_xz,epsilondev_yz,epsilon_trace_over_3, &
+ ANISOTROPY,NSPEC_ANISO, &
+ c11store,c12store,c13store,c14store,c15store,c16store,&
+ c22store,c23store,c24store,c25store,c26store,c33store,&
+ c34store,c35store,c36store,c44store,c45store,c46store,&
+ c55store,c56store,c66store, &
+ SIMULATION_TYPE,COMPUTE_AND_STORE_STRAIN,NSPEC_STRAIN_ONLY, &
+ NSPEC_BOUN,NSPEC2D_MOHO,NSPEC_ADJOINT, &
+ is_moho_top,is_moho_bot, &
+ dsdx_top,dsdx_bot, &
+ ispec2D_moho_top,ispec2D_moho_bot, &
+ num_phase_ispec_elastic,&
+ phase_ispec_inner_elastic,&
+ num_colors_outer_elastic,num_colors_inner_elastic)
+
+
+
+ ! computes elastic tensor term
+
+ use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NDIM, &
+ N_SLS,SAVE_MOHO_MESH, &
+ ONE_THIRD,FOUR_THIRDS,m1,m2
+
+ ! Trying to pass these variables as subroutine arguments ran into
+ ! problems, so we reference them from their module, making them
+ ! accessible from this subroutine
+ use specfem_par_elastic, only:dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3, &
+ newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3, &
+ tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3,num_elem_colors_elastic, &
+ dummyx_loc_att,dummyy_loc_att,dummyz_loc_att,tempx1_att,tempx2_att,tempx3_att, &
+ tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
+
+ use fault_solver_dynamic, only : Kelvin_Voigt_eta
+
+ implicit none
+
+ integer :: NSPEC_AB,NGLOB_AB
+
+ ! displacement and acceleration
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: displ,accel
+
+
+ ! arrays with mesh parameters per slice
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: ibool
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+ xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+ kappastore,mustore,jacobian
+
+ ! array with derivatives of Lagrange polynomials and precalculated products
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+ real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+ ! memory variables and standard linear solids for attenuation
+ logical :: ATTENUATION
+ logical :: COMPUTE_AND_STORE_STRAIN
+ integer :: NSPEC_STRAIN_ONLY, NSPEC_ADJOINT
+ integer :: NSPEC_ATTENUATION_AB
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: one_minus_sum_beta
+ real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB) :: factor_common
+ real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ATTENUATION_AB,N_SLS) :: &
+ R_xx,R_yy,R_xy,R_xz,R_yz
+
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_STRAIN_ONLY) :: &
+ epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
+ real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ADJOINT) :: epsilon_trace_over_3
+
+ ! anisotropy
+ logical :: ANISOTROPY
+ integer :: NSPEC_ANISO
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_ANISO) :: &
+ c11store,c12store,c13store,c14store,c15store,c16store, &
+ c22store,c23store,c24store,c25store,c26store,c33store, &
+ c34store,c35store,c36store,c44store,c45store,c46store, &
+ c55store,c56store,c66store
+
+ integer :: iphase
+ integer :: num_phase_ispec_elastic
+ integer, dimension(num_phase_ispec_elastic,2) :: phase_ispec_inner_elastic
+
+ ! adjoint simulations
+ integer :: SIMULATION_TYPE
+ integer :: NSPEC_BOUN,NSPEC2D_MOHO
+
+ ! moho kernel
+ real(kind=CUSTOM_REAL),dimension(NDIM,NDIM,NGLLX,NGLLY,NGLLZ,NSPEC2D_MOHO):: &
+ dsdx_top,dsdx_bot
+ logical,dimension(NSPEC_BOUN) :: is_moho_top,is_moho_bot
+ integer :: ispec2D_moho_top, ispec2D_moho_bot
+
+ ! local attenuation parameters
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: epsilondev_xx_loc, &
+ epsilondev_yy_loc, epsilondev_xy_loc, epsilondev_xz_loc, epsilondev_yz_loc
+ real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3
+ real(kind=CUSTOM_REAL) factor_loc,alphaval_loc,betaval_loc,gammaval_loc
+ real(kind=CUSTOM_REAL) Sn,Snp1
+ real(kind=CUSTOM_REAL) templ
+
+ real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+ real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
+
+ real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
+ real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
+
+ real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
+
+ real(kind=CUSTOM_REAL) fac1,fac2,fac3
+
+ real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
+ real(kind=CUSTOM_REAL) kappal
+
+ real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
+ real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att;
+ real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att;
+
+ integer OMP_get_thread_num
+ integer OMP_GET_MAX_THREADS
+
+ ! timing
+ !double precision omp_get_wtime
+ !double precision start_time
+ !double precision end_time
+ !double precision accumulate_time_start
+ !double precision accumulate_time_stop
+
+ ! local anisotropy parameters
+ real(kind=CUSTOM_REAL) c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
+ c33,c34,c35,c36,c44,c45,c46,c55,c56,c66
+
+ integer i_SLS,imodulo_N_SLS
+ integer ispec,iglob,ispec_p,num_elements
+ integer i,j,k
+ integer thread_id
+ integer NUM_THREADS
+ !integer omp_get_num_threads ! function
+
+ ! coloring additions
+ ! integer, dimension(:), allocatable :: num_elem_colors_elastic
+ integer istart, estart, number_of_colors
+ integer num_colors_outer_elastic, num_colors_inner_elastic
+ integer icolor
+
+ real(kind=CUSTOM_REAL) :: eta
+
+ ! write(*,*) "num_elem_colors_elastic(1) = ",num_elem_colors_elastic(1)
+ imodulo_N_SLS = mod(N_SLS,3)
+
+ ! NUM_THREADS = 1
+ NUM_THREADS = OMP_GET_MAX_THREADS()
+
+
+ ! choses inner/outer elements
+ if( iphase == 1 ) then
+ number_of_colors = num_colors_outer_elastic
+ istart = 1
+ else
+ number_of_colors = num_colors_inner_elastic + num_colors_outer_elastic
+ istart = num_colors_outer_elastic+1
+ ! istart = num_colors_outer_elastic
+ endif
+
+ ! "start" timer
+ ! start_time = omp_get_wtime()
+
+ ! The mesh coloring algorithm provides disjoint sets of elements that
+ ! do not share degrees of freedom which is required for the assembly
+ ! step at the "accel(iglob) += update" step. The coloring is
+ ! implemented, such that the element and node indices are ordered by
+ ! color. This requires then only to iterate through the elements in
+ ! order, stopping to synchronize threads after all the elements in a
+ ! color are finished.
+ estart = 1
+ do icolor = istart, number_of_colors
+
+ !$OMP PARALLEL DEFAULT(SHARED) PRIVATE(&
+ !$OMP R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3,&
+ !$OMP factor_loc,alphaval_loc,betaval_loc,gammaval_loc,&
+ !$OMP Sn,Snp1,&
+ !$OMP templ,&
+ !$OMP xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl,&
+ !$OMP duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl,&
+ !$OMP duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl,&
+ !$OMP duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl,&
+ !$OMP sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz,&
+ !$OMP fac1,fac2,fac3,&
+ !$OMP lambdal,mul,lambdalplus2mul,kappal,&
+ !$OMP c11,c12,c13,c14,c15,c16,c22,c23,c24,c25,c26,&
+ !$OMP c33,c34,c35,c36,c44,c45,c46,c55,c56,c66,&
+ !$OMP i_SLS,&
+ !$OMP ispec,iglob,ispec_p,&
+ !$OMP i,j,k,&
+ !$OMP thread_id)
+
+ thread_id = OMP_get_thread_num()+1
+
+ ! we retrive the subset of the total elements determined by the mesh
+ ! coloring. This number changes as we iterate through the colors
+ num_elements = num_elem_colors_elastic(icolor)
+ !$OMP DO
+ do ispec_p = estart,(estart-1)+num_elements
+
+
+ ! returns element id from stored element list
+ ispec = phase_ispec_inner_elastic(ispec_p,iphase)
+
+ ! adjoint simulations: moho kernel
+ if( SIMULATION_TYPE == 3 .and. SAVE_MOHO_MESH ) then
+ if (is_moho_top(ispec)) then
+ ispec2D_moho_top = ispec2D_moho_top + 1
+ else if (is_moho_bot(ispec)) then
+ ispec2D_moho_bot = ispec2D_moho_bot + 1
+ endif
+ endif ! adjoint
+
+ ! Kelvin Voigt damping: artificial viscosity around dynamic faults
+
+ ! stores displacment values in local array
+ if (allocated(Kelvin_Voigt_eta)) then
+ eta = Kelvin_Voigt_eta(ispec)
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec)
+ dummyx_loc(i,j,k,thread_id) = displ(1,iglob) + eta*veloc(1,iglob)
+ dummyy_loc(i,j,k,thread_id) = displ(2,iglob) + eta*veloc(2,iglob)
+ dummyz_loc(i,j,k,thread_id) = displ(3,iglob) + eta*veloc(3,iglob)
+ enddo
+ enddo
+ enddo
+
+ else
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec)
+ dummyx_loc(i,j,k,thread_id) = displ(1,iglob)
+ dummyy_loc(i,j,k,thread_id) = displ(2,iglob)
+ dummyz_loc(i,j,k,thread_id) = displ(3,iglob)
+ enddo
+ enddo
+ enddo
+ endif
+
+ ! stores velocity values in local array
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ iglob = ibool(i,j,k,ispec)
+ dummyx_loc_att(i,j,k,thread_id) = veloc(1,iglob)
+ dummyy_loc_att(i,j,k,thread_id) = veloc(2,iglob)
+ dummyz_loc_att(i,j,k,thread_id) = veloc(3,iglob)
+ enddo
+ enddo
+ enddo
+
+ ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+ ! for incompressible fluid flow, Cambridge University Press (2002),
+ ! pages 386 and 389 and Figure 8.3.1
+ ! call mxm_m1_m2_5points(hprime_xx,dummyx_loc,dummyy_loc,dummyz_loc,tempx1,tempy1,tempz1)
+ do j=1,m2
+ do i=1,m1
+ tempx1(i,j,1,thread_id) = &
+ hprime_xx(i,1)*dummyx_loc(1,j,1,thread_id) + &
+ hprime_xx(i,2)*dummyx_loc(2,j,1,thread_id) + &
+ hprime_xx(i,3)*dummyx_loc(3,j,1,thread_id) + &
+ hprime_xx(i,4)*dummyx_loc(4,j,1,thread_id) + &
+ hprime_xx(i,5)*dummyx_loc(5,j,1,thread_id)
+ tempy1(i,j,1,thread_id) = &
+ hprime_xx(i,1)*dummyy_loc(1,j,1,thread_id) + &
+ hprime_xx(i,2)*dummyy_loc(2,j,1,thread_id) + &
+ hprime_xx(i,3)*dummyy_loc(3,j,1,thread_id) + &
+ hprime_xx(i,4)*dummyy_loc(4,j,1,thread_id) + &
+ hprime_xx(i,5)*dummyy_loc(5,j,1,thread_id)
+ tempz1(i,j,1,thread_id) = &
+ hprime_xx(i,1)*dummyz_loc(1,j,1,thread_id) + &
+ hprime_xx(i,2)*dummyz_loc(2,j,1,thread_id) + &
+ hprime_xx(i,3)*dummyz_loc(3,j,1,thread_id) + &
+ hprime_xx(i,4)*dummyz_loc(4,j,1,thread_id) + &
+ hprime_xx(i,5)*dummyz_loc(5,j,1,thread_id)
+ enddo
+ enddo
+
+ ! call mxm_m1_m1_5points(dummyx_loc(1,1,k),dummyy_loc(1,1,k),dummyz_loc(1,1,k), &
+ ! hprime_xxT,tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k))
+ do j=1,m1
+ do i=1,m1
+ ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+ do k = 1,NGLLX
+ tempx2(i,j,k,thread_id) = dummyx_loc(i,1,k,thread_id)*hprime_xxT(1,j) + &
+ dummyx_loc(i,2,k,thread_id)*hprime_xxT(2,j) + &
+ dummyx_loc(i,3,k,thread_id)*hprime_xxT(3,j) + &
+ dummyx_loc(i,4,k,thread_id)*hprime_xxT(4,j) + &
+ dummyx_loc(i,5,k,thread_id)*hprime_xxT(5,j)
+ tempy2(i,j,k,thread_id) = dummyy_loc(i,1,k,thread_id)*hprime_xxT(1,j) + &
+ dummyy_loc(i,2,k,thread_id)*hprime_xxT(2,j) + &
+ dummyy_loc(i,3,k,thread_id)*hprime_xxT(3,j) + &
+ dummyy_loc(i,4,k,thread_id)*hprime_xxT(4,j) + &
+ dummyy_loc(i,5,k,thread_id)*hprime_xxT(5,j)
+ tempz2(i,j,k,thread_id) = dummyz_loc(i,1,k,thread_id)*hprime_xxT(1,j) + &
+ dummyz_loc(i,2,k,thread_id)*hprime_xxT(2,j) + &
+ dummyz_loc(i,3,k,thread_id)*hprime_xxT(3,j) + &
+ dummyz_loc(i,4,k,thread_id)*hprime_xxT(4,j) + &
+ dummyz_loc(i,5,k,thread_id)*hprime_xxT(5,j)
+ enddo
+ enddo
+ enddo
+
+ ! call mxm_m2_m1_5points(dummyx_loc,dummyy_loc,dummyz_loc,tempx3,tempy3,tempz3)
+ do j=1,m1
+ do i=1,m2
+ tempx3(i,1,j,thread_id) = &
+ dummyx_loc(i,1,1,thread_id)*hprime_xxT(1,j) + &
+ dummyx_loc(i,1,2,thread_id)*hprime_xxT(2,j) + &
+ dummyx_loc(i,1,3,thread_id)*hprime_xxT(3,j) + &
+ dummyx_loc(i,1,4,thread_id)*hprime_xxT(4,j) + &
+ dummyx_loc(i,1,5,thread_id)*hprime_xxT(5,j)
+ tempy3(i,1,j,thread_id) = &
+ dummyy_loc(i,1,1,thread_id)*hprime_xxT(1,j) + &
+ dummyy_loc(i,1,2,thread_id)*hprime_xxT(2,j) + &
+ dummyy_loc(i,1,3,thread_id)*hprime_xxT(3,j) + &
+ dummyy_loc(i,1,4,thread_id)*hprime_xxT(4,j) + &
+ dummyy_loc(i,1,5,thread_id)*hprime_xxT(5,j)
+ tempz3(i,1,j,thread_id) = &
+ dummyz_loc(i,1,1,thread_id)*hprime_xxT(1,j) + &
+ dummyz_loc(i,1,2,thread_id)*hprime_xxT(2,j) + &
+ dummyz_loc(i,1,3,thread_id)*hprime_xxT(3,j) + &
+ dummyz_loc(i,1,4,thread_id)*hprime_xxT(4,j) + &
+ dummyz_loc(i,1,5,thread_id)*hprime_xxT(5,j)
+
+ enddo
+ enddo
+
+ if( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
+ do j=1,m2
+ do i=1,m1
+ tempx1_att(i,j,1,thread_id) = tempx1(i,j,1,thread_id)
+ tempy1_att(i,j,1,thread_id) = tempy1(i,j,1,thread_id)
+ tempz1_att(i,j,1,thread_id) = tempz1(i,j,1,thread_id)
+ enddo
+ enddo
+
+ do j=1,m1
+ do i=1,m1
+ do k=1,NGLLX
+ tempx2_att(i,j,k,thread_id) = tempx2(i,j,k,thread_id)
+ tempy2_att(i,j,k,thread_id) = tempy2(i,j,k,thread_id)
+ tempz2_att(i,j,k,thread_id) = tempz2(i,j,k,thread_id)
+ enddo
+ enddo
+ enddo
+
+ do j=1,m1
+ do i=1,m2
+ tempx3_att(i,1,j,thread_id) = tempx3(i,1,j,thread_id)
+ tempy3_att(i,1,j,thread_id) = tempy3(i,1,j,thread_id)
+ tempz3_att(i,1,j,thread_id) = tempz3(i,1,j,thread_id)
+ enddo
+ enddo
+
+ ! use first order Taylor expansion of displacement for local storage of stresses
+ ! at this current time step, to fix attenuation in a consistent way
+ do j=1,m2
+ do i=1,m1
+ tempx1l_att(i,j,1,thread_id) = tempx1l_att(i,j,1,thread_id) + &
+ deltat*hprime_xx(i,1)*dummyx_loc_att(1,j,1,thread_id) + &
+ deltat*hprime_xx(i,2)*dummyx_loc_att(2,j,1,thread_id) + &
+ deltat*hprime_xx(i,3)*dummyx_loc_att(3,j,1,thread_id) + &
+ deltat*hprime_xx(i,4)*dummyx_loc_att(4,j,1,thread_id) + &
+ deltat*hprime_xx(i,5)*dummyx_loc_att(5,j,1,thread_id)
+
+ tempy1l_att(i,j,1,thread_id) = tempy1l_att(i,j,1,thread_id) + &
+ deltat*hprime_xx(i,1)*dummyy_loc_att(1,j,1,thread_id) + &
+ deltat*hprime_xx(i,2)*dummyy_loc_att(2,j,1,thread_id) + &
+ deltat*hprime_xx(i,3)*dummyy_loc_att(3,j,1,thread_id) + &
+ deltat*hprime_xx(i,4)*dummyy_loc_att(4,j,1,thread_id) + &
+ deltat*hprime_xx(i,5)*dummyy_loc_att(5,j,1,thread_id)
+
+ tempz1l_att(i,j,1,thread_id) = tempz1l_att(i,j,1,thread_id) + &
+ deltat*hprime_xx(i,1)*dummyz_loc_att(1,j,1,thread_id) + &
+ deltat*hprime_xx(i,2)*dummyz_loc_att(2,j,1,thread_id) + &
+ deltat*hprime_xx(i,3)*dummyz_loc_att(3,j,1,thread_id) + &
+ deltat*hprime_xx(i,4)*dummyz_loc_att(4,j,1,thread_id) + &
+ deltat*hprime_xx(i,5)*dummyz_loc_att(5,j,1,thread_id)
+ enddo
+ enddo
+
+ do j=1,m1
+ do i=1,m1
+ do k=1,NGLLX
+ tempx2l_att(i,j,k,thread_id) = tempx2l_att(i,j,k,thread_id) + &
+ deltat*dummyx_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
+ deltat*dummyx_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
+ deltat*dummyx_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
+ deltat*dummyx_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
+ deltat*dummyx_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
+
+ tempy2l_att(i,j,k,thread_id) = tempy2l_att(i,j,k,thread_id) + &
+ deltat*dummyy_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
+ deltat*dummyy_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
+ deltat*dummyy_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
+ deltat*dummyy_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
+ deltat*dummyy_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
+
+ tempz2l_att(i,j,k,thread_id) = tempz2l_att(i,j,k,thread_id) + &
+ deltat*dummyz_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
+ deltat*dummyz_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
+ deltat*dummyz_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
+ deltat*dummyz_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
+ deltat*dummyz_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
+ enddo
+ enddo
+ enddo
+
+ do j=1,m1
+ do i=1,m2
+ tempx3l_att(i,1,j,thread_id) = tempx3l_att(i,1,j,thread_id) + &
+ deltat*dummyx_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
+ deltat*dummyx_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
+ deltat*dummyx_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
+ deltat*dummyx_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
+ deltat*dummyx_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
+
+ tempy3l_att(i,1,j,thread_id) = tempy3l_att(i,1,j,thread_id) + &
+ deltat*dummyy_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
+ deltat*dummyy_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
+ deltat*dummyy_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
+ deltat*dummyy_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
+ deltat*dummyy_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
+
+ tempz3l_att(i,1,j,thread_id) = tempz3l_att(i,1,j,thread_id) + &
+ deltat*dummyz_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
+ deltat*dummyz_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
+ deltat*dummyz_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
+ deltat*dummyz_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
+ deltat*dummyz_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
+ enddo
+ endif
+ endif
+
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ ! get derivatives of ux, uy and uz with respect to x, y and z
+ xixl = xix(i,j,k,ispec)
+ xiyl = xiy(i,j,k,ispec)
+ xizl = xiz(i,j,k,ispec)
+ etaxl = etax(i,j,k,ispec)
+ etayl = etay(i,j,k,ispec)
+ etazl = etaz(i,j,k,ispec)
+ gammaxl = gammax(i,j,k,ispec)
+ gammayl = gammay(i,j,k,ispec)
+ gammazl = gammaz(i,j,k,ispec)
+ jacobianl = jacobian(i,j,k,ispec)
+
+ duxdxl = xixl*tempx1(i,j,k,thread_id) + etaxl*tempx2(i,j,k,thread_id) + gammaxl*tempx3(i,j,k,thread_id)
+ duxdyl = xiyl*tempx1(i,j,k,thread_id) + etayl*tempx2(i,j,k,thread_id) + gammayl*tempx3(i,j,k,thread_id)
+ duxdzl = xizl*tempx1(i,j,k,thread_id) + etazl*tempx2(i,j,k,thread_id) + gammazl*tempx3(i,j,k,thread_id)
+
+ duydxl = xixl*tempy1(i,j,k,thread_id) + etaxl*tempy2(i,j,k,thread_id) + gammaxl*tempy3(i,j,k,thread_id)
+ duydyl = xiyl*tempy1(i,j,k,thread_id) + etayl*tempy2(i,j,k,thread_id) + gammayl*tempy3(i,j,k,thread_id)
+ duydzl = xizl*tempy1(i,j,k,thread_id) + etazl*tempy2(i,j,k,thread_id) + gammazl*tempy3(i,j,k,thread_id)
+
+ duzdxl = xixl*tempz1(i,j,k,thread_id) + etaxl*tempz2(i,j,k,thread_id) + gammaxl*tempz3(i,j,k,thread_id)
+ duzdyl = xiyl*tempz1(i,j,k,thread_id) + etayl*tempz2(i,j,k,thread_id) + gammayl*tempz3(i,j,k,thread_id)
+ duzdzl = xizl*tempz1(i,j,k,thread_id) + etazl*tempz2(i,j,k,thread_id) + gammazl*tempz3(i,j,k,thread_id)
+
+ ! save strain on the Moho boundary
+ if (SAVE_MOHO_MESH ) then
+ if (is_moho_top(ispec)) then
+ dsdx_top(1,1,i,j,k,ispec2D_moho_top) = duxdxl
+ dsdx_top(1,2,i,j,k,ispec2D_moho_top) = duxdyl
+ dsdx_top(1,3,i,j,k,ispec2D_moho_top) = duxdzl
+ dsdx_top(2,1,i,j,k,ispec2D_moho_top) = duydxl
+ dsdx_top(2,2,i,j,k,ispec2D_moho_top) = duydyl
+ dsdx_top(2,3,i,j,k,ispec2D_moho_top) = duydzl
+ dsdx_top(3,1,i,j,k,ispec2D_moho_top) = duzdxl
+ dsdx_top(3,2,i,j,k,ispec2D_moho_top) = duzdyl
+ dsdx_top(3,3,i,j,k,ispec2D_moho_top) = duzdzl
+ else if (is_moho_bot(ispec)) then
+ dsdx_bot(1,1,i,j,k,ispec2D_moho_bot) = duxdxl
+ dsdx_bot(1,2,i,j,k,ispec2D_moho_bot) = duxdyl
+ dsdx_bot(1,3,i,j,k,ispec2D_moho_bot) = duxdzl
+ dsdx_bot(2,1,i,j,k,ispec2D_moho_bot) = duydxl
+ dsdx_bot(2,2,i,j,k,ispec2D_moho_bot) = duydyl
+ dsdx_bot(2,3,i,j,k,ispec2D_moho_bot) = duydzl
+ dsdx_bot(3,1,i,j,k,ispec2D_moho_bot) = duzdxl
+ dsdx_bot(3,2,i,j,k,ispec2D_moho_bot) = duzdyl
+ dsdx_bot(3,3,i,j,k,ispec2D_moho_bot) = duzdzl
+ endif
+ endif
+
+ ! precompute some sums to save CPU time
+ duxdxl_plus_duydyl = duxdxl + duydyl
+ duxdxl_plus_duzdzl = duxdxl + duzdzl
+ duydyl_plus_duzdzl = duydyl + duzdzl
+ duxdyl_plus_duydxl = duxdyl + duydxl
+ duzdxl_plus_duxdzl = duzdxl + duxdzl
+ duzdyl_plus_duydzl = duzdyl + duydzl
+
+ if( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
+ ! temporary variables used for fixing attenuation in a consistent way
+ duxdxl_att = xixl*tempx1l_att(i,j,k,thread_id) + etaxl*tempx2l_att(i,j,k,thread_id) + &
+ gammaxl*tempx3l_att(i,j,k,thread_id)
+ duxdyl_att = xiyl*tempx1l_att(i,j,k,thread_id) + etayl*tempx2l_att(i,j,k,thread_id) + &
+ gammayl*tempx3l_att(i,j,k,thread_id)
+ duxdzl_att = xizl*tempx1l_att(i,j,k,thread_id) + etazl*tempx2l_att(i,j,k,thread_id) + &
+ gammazl*tempx3l_att(i,j,k,thread_id)
+
+ duydxl_att = xixl*tempy1l_att(i,j,k,thread_id) + etaxl*tempy2l_att(i,j,k,thread_id) + &
+ gammaxl*tempy3l_att(i,j,k,thread_id)
+ duydyl_att = xiyl*tempy1l_att(i,j,k,thread_id) + etayl*tempy2l_att(i,j,k,thread_id) + &
+ gammayl*tempy3l_att(i,j,k,thread_id)
+ duydzl_att = xizl*tempy1l_att(i,j,k,thread_id) + etazl*tempy2l_att(i,j,k,thread_id) + &
+ gammazl*tempy3l_att(i,j,k,thread_id)
+
+ duzdxl_att = xixl*tempz1l_att(i,j,k,thread_id) + etaxl*tempz2l_att(i,j,k,thread_id) + &
+ gammaxl*tempz3l_att(i,j,k,thread_id)
+ duzdyl_att = xiyl*tempz1l_att(i,j,k,thread_id) + etayl*tempz2l_att(i,j,k,thread_id) + &
+ gammayl*tempz3l_att(i,j,k,thread_id)
+ duzdzl_att = xizl*tempz1l_att(i,j,k,thread_id) + etazl*tempz2l_att(i,j,k,thread_id) + &
+ gammazl*tempz3l_att(i,j,k,thread_id)
+
+ ! precompute some sums to save CPU time
+ duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
+ duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
+ duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
+
+ ! compute deviatoric strain
+ if( SIMULATION_TYPE == 3 ) &
+ epsilon_trace_over_3(i,j,k,ispec) = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
+ epsilondev_xx_loc(i,j,k) = duxdxl_att - epsilon_trace_over_3(i,j,k,ispec)
+ epsilondev_yy_loc(i,j,k) = duydyl_att - epsilon_trace_over_3(i,j,k,ispec)
+ epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl_att
+ epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
+ epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl_att
+ else
+ ! computes deviatoric strain attenuation and/or for kernel calculations
+ if (COMPUTE_AND_STORE_STRAIN) then
+ templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+ if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
+ epsilondev_xx_loc(i,j,k) = duxdxl - templ
+ epsilondev_yy_loc(i,j,k) = duydyl - templ
+ epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl
+ epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl
+ epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl
+ endif
+ endif
+
+ kappal = kappastore(i,j,k,ispec)
+ mul = mustore(i,j,k,ispec)
+
+ ! attenuation
+ if(ATTENUATION) then
+ ! use unrelaxed parameters if attenuation
+ mul = mul * one_minus_sum_beta(i,j,k,ispec)
+ endif
+
+ ! full anisotropic case, stress calculations
+ if(ANISOTROPY) then
+ c11 = c11store(i,j,k,ispec)
+ c12 = c12store(i,j,k,ispec)
+ c13 = c13store(i,j,k,ispec)
+ c14 = c14store(i,j,k,ispec)
+ c15 = c15store(i,j,k,ispec)
+ c16 = c16store(i,j,k,ispec)
+ c22 = c22store(i,j,k,ispec)
+ c23 = c23store(i,j,k,ispec)
+ c24 = c24store(i,j,k,ispec)
+ c25 = c25store(i,j,k,ispec)
+ c26 = c26store(i,j,k,ispec)
+ c33 = c33store(i,j,k,ispec)
+ c34 = c34store(i,j,k,ispec)
+ c35 = c35store(i,j,k,ispec)
+ c36 = c36store(i,j,k,ispec)
+ c44 = c44store(i,j,k,ispec)
+ c45 = c45store(i,j,k,ispec)
+ c46 = c46store(i,j,k,ispec)
+ c55 = c55store(i,j,k,ispec)
+ c56 = c56store(i,j,k,ispec)
+ c66 = c66store(i,j,k,ispec)
+
+ sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
+ c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
+ sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
+ c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
+ sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
+ c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
+ sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
+ c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
+ sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
+ c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
+ sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
+ c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
+
+ else
+
+ ! isotropic case
+ lambdalplus2mul = kappal + FOUR_THIRDS * mul
+ lambdal = lambdalplus2mul - 2.*mul
+
+ ! compute stress sigma
+ sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
+ sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
+ sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
+
+ sigma_xy = mul*duxdyl_plus_duydxl
+ sigma_xz = mul*duzdxl_plus_duxdzl
+ sigma_yz = mul*duzdyl_plus_duydzl
+
+ endif ! ANISOTROPY
+
+ ! subtract memory variables if attenuation
+ if(ATTENUATION) then
+ ! way 1
+ ! do i_sls = 1,N_SLS
+ ! R_xx_val = R_xx(i,j,k,ispec,i_sls)
+ ! R_yy_val = R_yy(i,j,k,ispec,i_sls)
+ ! sigma_xx = sigma_xx - R_xx_val
+ ! sigma_yy = sigma_yy - R_yy_val
+ ! sigma_zz = sigma_zz + R_xx_val + R_yy_val
+ ! sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
+ ! sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
+ ! sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
+ ! enddo
+
+ ! way 2
+ ! note: this should help compilers to pipeline the code and make better use of the cache;
+ ! depending on compilers, it can further decrease the computation time by ~ 30%.
+ ! by default, N_SLS = 3, therefore we take steps of 3
+ if(imodulo_N_SLS >= 1) then
+ do i_sls = 1,imodulo_N_SLS
+ R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
+ R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
+ sigma_xx = sigma_xx - R_xx_val1
+ sigma_yy = sigma_yy - R_yy_val1
+ sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
+ sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
+ sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
+ sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
+ enddo
+ endif
+
+ if(N_SLS >= imodulo_N_SLS+1) then
+ do i_sls = imodulo_N_SLS+1,N_SLS,3
+ R_xx_val1 = R_xx(i,j,k,ispec,i_sls)
+ R_yy_val1 = R_yy(i,j,k,ispec,i_sls)
+ sigma_xx = sigma_xx - R_xx_val1
+ sigma_yy = sigma_yy - R_yy_val1
+ sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
+ sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls)
+ sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls)
+ sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls)
+
+ R_xx_val2 = R_xx(i,j,k,ispec,i_sls+1)
+ R_yy_val2 = R_yy(i,j,k,ispec,i_sls+1)
+ sigma_xx = sigma_xx - R_xx_val2
+ sigma_yy = sigma_yy - R_yy_val2
+ sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2
+ sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+1)
+ sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+1)
+ sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+1)
+
+ R_xx_val3 = R_xx(i,j,k,ispec,i_sls+2)
+ R_yy_val3 = R_yy(i,j,k,ispec,i_sls+2)
+ sigma_xx = sigma_xx - R_xx_val3
+ sigma_yy = sigma_yy - R_yy_val3
+ sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3
+ sigma_xy = sigma_xy - R_xy(i,j,k,ispec,i_sls+2)
+ sigma_xz = sigma_xz - R_xz(i,j,k,ispec,i_sls+2)
+ sigma_yz = sigma_yz - R_yz(i,j,k,ispec,i_sls+2)
+ enddo
+ endif
+
+
+ endif
+
+ ! form dot product with test vector, symmetric form
+ tempx1(i,j,k,thread_id) = jacobianl * (sigma_xx*xixl + sigma_xy*xiyl + sigma_xz*xizl)
+ tempy1(i,j,k,thread_id) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_yz*xizl)
+ tempz1(i,j,k,thread_id) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl)
+
+ tempx2(i,j,k,thread_id) = jacobianl * (sigma_xx*etaxl + sigma_xy*etayl + sigma_xz*etazl)
+ tempy2(i,j,k,thread_id) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_yz*etazl)
+ tempz2(i,j,k,thread_id) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl)
+
+ tempx3(i,j,k,thread_id) = jacobianl * (sigma_xx*gammaxl + sigma_xy*gammayl + sigma_xz*gammazl)
+ tempy3(i,j,k,thread_id) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_yz*gammazl)
+ tempz3(i,j,k,thread_id) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl)
+
+ enddo
+ enddo
+ enddo
+
+ ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+ ! for incompressible fluid flow, Cambridge University Press (2002),
+ ! pages 386 and 389 and Figure 8.3.1
+ ! call mxm_m1_m2_5points(hprimewgll_xxT,tempx1,tempy1,tempz1,newtempx1,newtempy1,newtempz1)
+ do j=1,m2
+ do i=1,m1
+ newtempx1(i,j,1,thread_id) = &
+ hprimewgll_xxT(i,1)*tempx1(1,j,1,thread_id) + &
+ hprimewgll_xxT(i,2)*tempx1(2,j,1,thread_id) + &
+ hprimewgll_xxT(i,3)*tempx1(3,j,1,thread_id) + &
+ hprimewgll_xxT(i,4)*tempx1(4,j,1,thread_id) + &
+ hprimewgll_xxT(i,5)*tempx1(5,j,1,thread_id)
+ newtempy1(i,j,1,thread_id) = &
+ hprimewgll_xxT(i,1)*tempy1(1,j,1,thread_id) + &
+ hprimewgll_xxT(i,2)*tempy1(2,j,1,thread_id) + &
+ hprimewgll_xxT(i,3)*tempy1(3,j,1,thread_id) + &
+ hprimewgll_xxT(i,4)*tempy1(4,j,1,thread_id) + &
+ hprimewgll_xxT(i,5)*tempy1(5,j,1,thread_id)
+ newtempz1(i,j,1,thread_id) = &
+ hprimewgll_xxT(i,1)*tempz1(1,j,1,thread_id) + &
+ hprimewgll_xxT(i,2)*tempz1(2,j,1,thread_id) + &
+ hprimewgll_xxT(i,3)*tempz1(3,j,1,thread_id) + &
+ hprimewgll_xxT(i,4)*tempz1(4,j,1,thread_id) + &
+ hprimewgll_xxT(i,5)*tempz1(5,j,1,thread_id)
+ enddo
+ enddo
+
+ ! call mxm_m1_m1_5points(tempx2(1,1,k),tempy2(1,1,k),tempz2(1,1,k), &
+ ! hprimewgll_xx,newtempx2(1,1,k),newtempy2(1,1,k),newtempz2(1,1,k))
+ do i=1,m1
+ do j=1,m1
+ ! for efficiency it is better to leave this loop on k inside, it leads to slightly faster code
+ do k = 1,NGLLX
+ newtempx2(i,j,k,thread_id) = tempx2(i,1,k,thread_id)*hprimewgll_xx(1,j) + &
+ tempx2(i,2,k,thread_id)*hprimewgll_xx(2,j) + &
+ tempx2(i,3,k,thread_id)*hprimewgll_xx(3,j) + &
+ tempx2(i,4,k,thread_id)*hprimewgll_xx(4,j) + &
+ tempx2(i,5,k,thread_id)*hprimewgll_xx(5,j)
+ newtempy2(i,j,k,thread_id) = tempy2(i,1,k,thread_id)*hprimewgll_xx(1,j) + &
+ tempy2(i,2,k,thread_id)*hprimewgll_xx(2,j) + &
+ tempy2(i,3,k,thread_id)*hprimewgll_xx(3,j) + &
+ tempy2(i,4,k,thread_id)*hprimewgll_xx(4,j) + &
+ tempy2(i,5,k,thread_id)*hprimewgll_xx(5,j)
+ newtempz2(i,j,k,thread_id) = tempz2(i,1,k,thread_id)*hprimewgll_xx(1,j) + &
+ tempz2(i,2,k,thread_id)*hprimewgll_xx(2,j) + &
+ tempz2(i,3,k,thread_id)*hprimewgll_xx(3,j) + &
+ tempz2(i,4,k,thread_id)*hprimewgll_xx(4,j) + &
+ tempz2(i,5,k,thread_id)*hprimewgll_xx(5,j)
+ enddo
+ enddo
+ enddo
+
+ ! call mxm_m2_m1_5points(tempx3,tempy3,tempz3,hprimewgll_xx,newtempx3,newtempy3,newtempz3)
+ do j=1,m1
+ do i=1,m2
+ newtempx3(i,1,j,thread_id) = &
+ tempx3(i,1,1,thread_id)*hprimewgll_xx(1,j) + &
+ tempx3(i,1,2,thread_id)*hprimewgll_xx(2,j) + &
+ tempx3(i,1,3,thread_id)*hprimewgll_xx(3,j) + &
+ tempx3(i,1,4,thread_id)*hprimewgll_xx(4,j) + &
+ tempx3(i,1,5,thread_id)*hprimewgll_xx(5,j)
+ newtempy3(i,1,j,thread_id) = &
+ tempy3(i,1,1,thread_id)*hprimewgll_xx(1,j) + &
+ tempy3(i,1,2,thread_id)*hprimewgll_xx(2,j) + &
+ tempy3(i,1,3,thread_id)*hprimewgll_xx(3,j) + &
+ tempy3(i,1,4,thread_id)*hprimewgll_xx(4,j) + &
+ tempy3(i,1,5,thread_id)*hprimewgll_xx(5,j)
+ newtempz3(i,1,j,thread_id) = &
+ tempz3(i,1,1,thread_id)*hprimewgll_xx(1,j) + &
+ tempz3(i,1,2,thread_id)*hprimewgll_xx(2,j) + &
+ tempz3(i,1,3,thread_id)*hprimewgll_xx(3,j) + &
+ tempz3(i,1,4,thread_id)*hprimewgll_xx(4,j) + &
+ tempz3(i,1,5,thread_id)*hprimewgll_xx(5,j)
+ enddo
+ enddo
+
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+
+ fac1 = wgllwgll_yz(j,k)
+ fac2 = wgllwgll_xz(i,k)
+ fac3 = wgllwgll_xy(i,j)
+
+ ! sum contributions from each element to the global mesh using indirect addressing
+ iglob = ibool(i,j,k,ispec)
+
+ ! accel_omp(1,iglob,thread_id) = accel_omp(1,iglob,thread_id)&
+ ! - fac1*newtempx1(i,j,k,thread_id) - fac2*newtempx2(i,j,k,thread_id)&
+ ! - fac3*newtempx3(i,j,k,thread_id)
+ ! accel_omp(2,iglob,thread_id) = accel_omp(2,iglob,thread_id)&
+ ! - fac1*newtempy1(i,j,k,thread_id) - fac2*newtempy2(i,j,k,thread_id)&
+ ! - fac3*newtempy3(i,j,k,thread_id)
+ ! accel_omp(3,iglob,thread_id) = accel_omp(3,iglob,thread_id)&
+ ! - fac1*newtempz1(i,j,k,thread_id) - fac2*newtempz2(i,j,k,thread_id)&
+ ! - fac3*newtempz3(i,j,k,thread_id)
+
+ ! Assembly of shared degrees of freedom fixed through mesh coloring
+ !! !$OMP ATOMIC
+ accel(1,iglob) = accel(1,iglob) &
+ - (fac1*newtempx1(i,j,k,thread_id) &
+ + fac2*newtempx2(i,j,k,thread_id) &
+ + fac3*newtempx3(i,j,k,thread_id))
+ !! !$OMP ATOMIC
+ accel(2,iglob) = accel(2,iglob) &
+ - (fac1*newtempy1(i,j,k,thread_id) &
+ + fac2*newtempy2(i,j,k,thread_id) &
+ + fac3*newtempy3(i,j,k,thread_id))
+ !! !$OMP ATOMIC
+ accel(3,iglob) = accel(3,iglob) &
+ - (fac1*newtempz1(i,j,k,thread_id) &
+ + fac2*newtempz2(i,j,k,thread_id) &
+ + fac3*newtempz3(i,j,k,thread_id))
+
+ ! accel(1,iglob) = accel(1,iglob) - &
+ ! (fac1*newtempx1(i,j,k,thread_id) + fac2*newtempx2(i,j,k,thread_id) + fac3*newtempx3(i,j,k,thread_id))
+ ! accel(2,iglob) = accel(2,iglob) - &
+ ! (fac1*newtempy1(i,j,k,thread_id) + fac2*newtempy2(i,j,k,thread_id) + fac3*newtempy3(i,j,k,thread_id))
+ ! accel(3,iglob) = accel(3,iglob) - &
+ ! (fac1*newtempz1(i,j,k,thread_id) + fac2*newtempz2(i,j,k,thread_id) + fac3*newtempz3(i,j,k,thread_id))
+
+ ! accel_omp(1,iglob,thread_id) = accel_omp(1,iglob,thread_id) - fac1*newtempx1(i,j,k,thread_id) - &
+ ! fac2*newtempx2(i,j,k,thread_id) - fac3*newtempx3(i,j,k,thread_id)
+ ! accel_omp(2,iglob,thread_id) = accel_omp(2,iglob,thread_id) - fac1*newtempy1(i,j,k,thread_id) - &
+ ! fac2*newtempy2(i,j,k,thread_id) - fac3*newtempy3(i,j,k,thread_id)
+ ! accel_omp(3,iglob,thread_id) = accel_omp(3,iglob,thread_id) - fac1*newtempz1(i,j,k,thread_id) - &
+ ! fac2*newtempz2(i,j,k,thread_id) - fac3*newtempz3(i,j,k,thread_id)
+
+ ! update memory variables based upon the Runge-Kutta scheme
+ if(ATTENUATION) then
+
+ ! use Runge-Kutta scheme to march in time
+ do i_sls = 1,N_SLS
+
+ factor_loc = mustore(i,j,k,ispec) * factor_common(i_sls,i,j,k,ispec)
+
+ alphaval_loc = alphaval(i_sls)
+ betaval_loc = betaval(i_sls)
+ gammaval_loc = gammaval(i_sls)
+
+ ! term in xx
+ Sn = factor_loc * epsilondev_xx(i,j,k,ispec)
+ Snp1 = factor_loc * epsilondev_xx_loc(i,j,k)
+ R_xx(i,j,k,ispec,i_sls) = alphaval_loc * R_xx(i,j,k,ispec,i_sls) + &
+ betaval_loc * Sn + gammaval_loc * Snp1
+ ! term in yy
+ Sn = factor_loc * epsilondev_yy(i,j,k,ispec)
+ Snp1 = factor_loc * epsilondev_yy_loc(i,j,k)
+ R_yy(i,j,k,ispec,i_sls) = alphaval_loc * R_yy(i,j,k,ispec,i_sls) + &
+ betaval_loc * Sn + gammaval_loc * Snp1
+ ! term in zz not computed since zero trace
+ ! term in xy
+ Sn = factor_loc * epsilondev_xy(i,j,k,ispec)
+ Snp1 = factor_loc * epsilondev_xy_loc(i,j,k)
+ R_xy(i,j,k,ispec,i_sls) = alphaval_loc * R_xy(i,j,k,ispec,i_sls) + &
+ betaval_loc * Sn + gammaval_loc * Snp1
+ ! term in xz
+ Sn = factor_loc * epsilondev_xz(i,j,k,ispec)
+ Snp1 = factor_loc * epsilondev_xz_loc(i,j,k)
+ R_xz(i,j,k,ispec,i_sls) = alphaval_loc * R_xz(i,j,k,ispec,i_sls) + &
+ betaval_loc * Sn + gammaval_loc * Snp1
+ ! term in yz
+ Sn = factor_loc * epsilondev_yz(i,j,k,ispec)
+ Snp1 = factor_loc * epsilondev_yz_loc(i,j,k)
+ R_yz(i,j,k,ispec,i_sls) = alphaval_loc * R_yz(i,j,k,ispec,i_sls) + &
+ betaval_loc * Sn + gammaval_loc * Snp1
+
+ enddo ! end of loop on memory variables
+
+ endif ! end attenuation
+
+ enddo
+ enddo
+ enddo
+
+ ! save deviatoric strain for Runge-Kutta scheme
+ if ( COMPUTE_AND_STORE_STRAIN ) then
+ epsilondev_xx(:,:,:,ispec) = epsilondev_xx_loc(:,:,:)
+ epsilondev_yy(:,:,:,ispec) = epsilondev_yy_loc(:,:,:)
+ epsilondev_xy(:,:,:,ispec) = epsilondev_xy_loc(:,:,:)
+ epsilondev_xz(:,:,:,ispec) = epsilondev_xz_loc(:,:,:)
+ epsilondev_yz(:,:,:,ispec) = epsilondev_yz_loc(:,:,:)
+ endif
+
+ enddo ! spectral element loop
+ !$OMP END DO
+ !$OMP END PARALLEL
+
+ ! The elements are in order of color. First we do color 1 elements,
+ ! then color 2, etc. The ispec has to moved to start at the next
+ ! color.
+ estart = estart + num_elements
+
+ enddo ! loop over colors
+
+
+ ! "stop" timer
+ ! end_time = omp_get_wtime()
+
+ ! write(*,*) "Total Elapsed time: ", (end_time-start_time) , "seconds. (Threads=",NUM_THREADS,")"
+ ! write(*,*) "Accumulate Elapsed time: ", (accumulate_time_stop-accumulate_time_start) , "seconds"
+
+
+ ! These are now allocated at the beginning and never deallocated
+ ! because the program just finishes at the end.
+
+ ! deallocate(dummyx_loc)
+ ! deallocate(dummyy_loc)
+ ! deallocate(dummyz_loc)
+ ! deallocate(dummyx_loc_att)
+ ! deallocate(dummyy_loc_att)
+ ! deallocate(dummyz_loc_att)
+ ! deallocate(newtempx1)
+ ! deallocate(newtempx2)
+ ! deallocate(newtempx3)
+ ! deallocate(newtempy1)
+ ! deallocate(newtempy2)
+ ! deallocate(newtempy3)
+ ! deallocate(newtempz1)
+ ! deallocate(newtempz2)
+ ! deallocate(newtempz3)
+ ! deallocate(tempx1)
+ ! deallocate(tempx2)
+ ! deallocate(tempx3)
+ ! deallocate(tempy1)
+ ! deallocate(tempy2)
+ ! deallocate(tempy3)
+ ! deallocate(tempz1)
+ ! deallocate(tempz2)
+ ! deallocate(tempz3)
+ ! deallocate(tempx1_att)
+ ! deallocate(tempx2_att)
+ ! deallocate(tempx3_att)
+ ! deallocate(tempy1_att)
+ ! deallocate(tempy2_att)
+ ! deallocate(tempy3_att)
+ ! deallocate(tempz1_att)
+ ! deallocate(tempz2_att)
+ ! deallocate(tempz3_att)
+
+ ! accel(:,:) = accel_omp(:,:,1)
+
+ end subroutine compute_forces_viscoelastic_Dev_openmp
+
+
More information about the CIG-COMMITS
mailing list