[cig-commits] r21037 - seismo/3D/SPECFEM3D/trunk/src/specfem3D

joseph.charles at geodynamics.org joseph.charles at geodynamics.org
Thu Nov 15 06:59:03 PST 2012


Author: joseph.charles
Date: 2012-11-15 06:59:03 -0800 (Thu, 15 Nov 2012)
New Revision: 21037

Modified:
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_Dev_openmp.f90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/prepare_timerun.F90
   seismo/3D/SPECFEM3D/trunk/src/specfem3D/specfem3D_par.f90
Log:
fixes attenuation bug in OpenMP code by using 1st order Taylor expansion for epsilondev_loc variable


Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_Dev_openmp.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_Dev_openmp.f90	2012-11-15 13:41:58 UTC (rev 21036)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/compute_forces_elastic_Dev_openmp.f90	2012-11-15 14:59:03 UTC (rev 21037)
@@ -27,13 +27,13 @@
 ! OpenMP Threaded variant by John Levesque, Max Rietmann and Olaf Schenk
 
   subroutine compute_forces_elastic_Dev_openmp(iphase ,NSPEC_AB,NGLOB_AB, &
-                             displ,accel, &
+                             displ,veloc,accel, &
                              xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
                              hprime_xx,hprime_xxT, &
                              hprimewgll_xx,hprimewgll_xxT, &
                              wgllwgll_xy,wgllwgll_xz,wgllwgll_yz, &
                              kappastore,mustore,jacobian,ibool, &
-                             ATTENUATION, &
+                             ATTENUATION,deltat, &
                              one_minus_sum_beta,factor_common,alphaval,betaval,gammaval,&
                              NSPEC_ATTENUATION_AB, &
                              R_xx,R_yy,R_xy,R_xz,R_yz, &
@@ -64,9 +64,11 @@
   ! Trying to pass these variables as subroutine arguments ran into
   ! problems, so we reference them from their module, making them
   ! accessible from this subroutine
-  use specfem_par_elastic, only:dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3,&
-       newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3,&
-       tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3,num_elem_colors_elastic
+  use specfem_par_elastic, only:dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3, &
+       newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3, &
+       tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3,num_elem_colors_elastic, &
+       dummyx_loc_att,dummyy_loc_att,dummyz_loc_att,tempx1_att,tempx2_att,tempx3_att, &
+       tempy1_att,tempy2_att,tempy3_att,tempz1_att,tempz2_att,tempz3_att
 
   implicit none
 
@@ -149,6 +151,10 @@
   real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
   real(kind=CUSTOM_REAL) kappal
 
+  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
+  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att;
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att;
+
   integer OMP_get_thread_num
   integer OMP_GET_MAX_THREADS
 
@@ -246,7 +252,7 @@
           endif
        endif ! adjoint
 
-       ! stores displacment values in local array
+       ! stores displacement values in local array
        do k=1,NGLLZ
           do j=1,NGLLY
              do i=1,NGLLX
@@ -258,6 +264,18 @@
           enddo
        enddo
 
+       ! stores velocity values in local array
+       do k=1,NGLLZ
+          do j=1,NGLLY
+             do i=1,NGLLX
+                iglob = ibool(i,j,k,ispec)
+                dummyx_loc_att(i,j,k,thread_id) = veloc(1,iglob)
+                dummyy_loc_att(i,j,k,thread_id) = veloc(2,iglob)
+                dummyz_loc_att(i,j,k,thread_id) = veloc(3,iglob)
+             enddo
+          enddo
+       enddo
+
        ! subroutines adapted from Deville, Fischer and Mund, High-order methods
        ! for incompressible fluid flow, Cambridge University Press (2002),
        ! pages 386 and 389 and Figure 8.3.1
@@ -335,6 +353,113 @@
           enddo
        enddo
 
+       if( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
+          do j=1,m2
+             do i=1,m1
+                tempx1_att(i,j,1,thread_id) = tempx1(i,j,1,thread_id)
+                tempy1_att(i,j,1,thread_id) = tempy1(i,j,1,thread_id)
+                tempz1_att(i,j,1,thread_id) = tempz1(i,j,1,thread_id)
+             enddo
+          enddo
+
+          do j=1,m1
+             do i=1,m1
+                do k=1,NGLLX
+                   tempx2_att(i,j,k,thread_id) = tempx2(i,j,k,thread_id)
+                   tempy2_att(i,j,k,thread_id) = tempy2(i,j,k,thread_id)
+                   tempz2_att(i,j,k,thread_id) = tempz2(i,j,k,thread_id)
+                enddo
+             enddo
+          enddo
+
+          do j=1,m1
+             do i=1,m2
+                tempx3_att(i,1,j,thread_id) = tempx3(i,1,j,thread_id)
+                tempy3_att(i,1,j,thread_id) = tempy3(i,1,j,thread_id)
+                tempz3_att(i,1,j,thread_id) = tempz3(i,1,j,thread_id)
+             enddo
+          enddo
+
+          ! use first order Taylor expansion of displacement for local storage of stresses 
+          ! at this current time step, to fix attenuation in a consistent way
+          do j=1,m2
+             do i=1,m1
+                tempx1l_att(i,j,1,thread_id) = tempx1l_att(i,j,1,thread_id) + &
+                     deltat*hprime_xx(i,1)*dummyx_loc_att(1,j,1,thread_id) + &
+                     deltat*hprime_xx(i,2)*dummyx_loc_att(2,j,1,thread_id) + &
+                     deltat*hprime_xx(i,3)*dummyx_loc_att(3,j,1,thread_id) + &
+                     deltat*hprime_xx(i,4)*dummyx_loc_att(4,j,1,thread_id) + &
+                     deltat*hprime_xx(i,5)*dummyx_loc_att(5,j,1,thread_id)
+
+                tempy1l_att(i,j,1,thread_id) = tempy1l_att(i,j,1,thread_id) + &
+                     deltat*hprime_xx(i,1)*dummyy_loc_att(1,j,1,thread_id) + &
+                     deltat*hprime_xx(i,2)*dummyy_loc_att(2,j,1,thread_id) + &
+                     deltat*hprime_xx(i,3)*dummyy_loc_att(3,j,1,thread_id) + &
+                     deltat*hprime_xx(i,4)*dummyy_loc_att(4,j,1,thread_id) + &
+                     deltat*hprime_xx(i,5)*dummyy_loc_att(5,j,1,thread_id)
+
+                tempz1l_att(i,j,1,thread_id) = tempz1l_att(i,j,1,thread_id) + &
+                     deltat*hprime_xx(i,1)*dummyz_loc_att(1,j,1,thread_id) + &
+                     deltat*hprime_xx(i,2)*dummyz_loc_att(2,j,1,thread_id) + &
+                     deltat*hprime_xx(i,3)*dummyz_loc_att(3,j,1,thread_id) + &
+                     deltat*hprime_xx(i,4)*dummyz_loc_att(4,j,1,thread_id) + &
+                     deltat*hprime_xx(i,5)*dummyz_loc_att(5,j,1,thread_id)
+             enddo
+          enddo
+
+          do j=1,m1
+             do i=1,m1
+                do k=1,NGLLX
+                   tempx2l_att(i,j,k,thread_id) = tempx2l_att(i,j,k,thread_id) + &
+                        deltat*dummyx_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
+                        deltat*dummyx_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
+                        deltat*dummyx_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
+                        deltat*dummyx_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
+                        deltat*dummyx_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
+
+                   tempy2l_att(i,j,k,thread_id) = tempy2l_att(i,j,k,thread_id) + &
+                        deltat*dummyy_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
+                        deltat*dummyy_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
+                        deltat*dummyy_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
+                        deltat*dummyy_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
+                        deltat*dummyy_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
+
+                   tempz2l_att(i,j,k,thread_id) = tempz2l_att(i,j,k,thread_id) + &
+                        deltat*dummyz_loc_att(i,1,k,thread_id)*hprime_xxT(1,j) + &
+                        deltat*dummyz_loc_att(i,2,k,thread_id)*hprime_xxT(2,j) + &
+                        deltat*dummyz_loc_att(i,3,k,thread_id)*hprime_xxT(3,j) + &
+                        deltat*dummyz_loc_att(i,4,k,thread_id)*hprime_xxT(4,j) + &
+                        deltat*dummyz_loc_att(i,5,k,thread_id)*hprime_xxT(5,j)
+                enddo
+             enddo
+          enddo
+
+          do j=1,m1
+             do i=1,m2
+                tempx3l_att(i,1,j,thread_id) = tempx3l_att(i,1,j,thread_id) + &
+                     deltat*dummyx_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
+                     deltat*dummyx_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
+                     deltat*dummyx_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
+                     deltat*dummyx_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
+                     deltat*dummyx_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
+
+                tempy3l_att(i,1,j,thread_id) = tempy3l_att(i,1,j,thread_id) + &
+                     deltat*dummyy_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
+                     deltat*dummyy_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
+                     deltat*dummyy_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
+                     deltat*dummyy_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
+                     deltat*dummyy_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
+
+                tempz3l_att(i,1,j,thread_id) = tempz3l_att(i,1,j,thread_id) + &
+                     deltat*dummyz_loc_att(i,1,1,thread_id)*hprime_xxT(1,j) + &
+                     deltat*dummyz_loc_att(i,1,2,thread_id)*hprime_xxT(2,j) + &
+                     deltat*dummyz_loc_att(i,1,3,thread_id)*hprime_xxT(3,j) + &
+                     deltat*dummyz_loc_att(i,1,4,thread_id)*hprime_xxT(4,j) + &
+                     deltat*dummyz_loc_att(i,1,5,thread_id)*hprime_xxT(5,j)
+             enddo
+          endif
+       endif
+
        do k=1,NGLLZ
           do j=1,NGLLY
              do i=1,NGLLX
@@ -394,16 +519,54 @@
                 duxdyl_plus_duydxl = duxdyl + duydxl
                 duzdxl_plus_duxdzl = duzdxl + duxdzl
                 duzdyl_plus_duydzl = duzdyl + duydzl
+                
+                if( ATTENUATION .and. COMPUTE_AND_STORE_STRAIN ) then
+                   ! temporary variables used for fixing attenuation in a consistent way
+                   duxdxl_att = xixl*tempx1l_att(i,j,k,thread_id) + etaxl*tempx2l_att(i,j,k,thread_id) + &
+                        gammaxl*tempx3l_att(i,j,k,thread_id)
+                   duxdyl_att = xiyl*tempx1l_att(i,j,k,thread_id) + etayl*tempx2l_att(i,j,k,thread_id) + &
+                        gammayl*tempx3l_att(i,j,k,thread_id)
+                   duxdzl_att = xizl*tempx1l_att(i,j,k,thread_id) + etazl*tempx2l_att(i,j,k,thread_id) + &
+                        gammazl*tempx3l_att(i,j,k,thread_id)
 
-                ! computes deviatoric strain attenuation and/or for kernel calculations
-                if (COMPUTE_AND_STORE_STRAIN) then
-                   templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
-                   if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
-                   epsilondev_xx_loc(i,j,k) = duxdxl - templ
-                   epsilondev_yy_loc(i,j,k) = duydyl - templ
-                   epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl
-                   epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl
-                   epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl
+                   duydxl_att = xixl*tempy1l_att(i,j,k,thread_id) + etaxl*tempy2l_att(i,j,k,thread_id) + &
+                        gammaxl*tempy3l_att(i,j,k,thread_id)
+                   duydyl_att = xiyl*tempy1l_att(i,j,k,thread_id) + etayl*tempy2l_att(i,j,k,thread_id) + &
+                        gammayl*tempy3l_att(i,j,k,thread_id)
+                   duydzl_att = xizl*tempy1l_att(i,j,k,thread_id) + etazl*tempy2l_att(i,j,k,thread_id) + &
+                        gammazl*tempy3l_att(i,j,k,thread_id)
+
+                   duzdxl_att = xixl*tempz1l_att(i,j,k,thread_id) + etaxl*tempz2l_att(i,j,k,thread_id) + &
+                        gammaxl*tempz3l_att(i,j,k,thread_id)
+                   duzdyl_att = xiyl*tempz1l_att(i,j,k,thread_id) + etayl*tempz2l_att(i,j,k,thread_id) + &
+                        gammayl*tempz3l_att(i,j,k,thread_id)
+                   duzdzl_att = xizl*tempz1l_att(i,j,k,thread_id) + etazl*tempz2l_att(i,j,k,thread_id) + &
+                        gammazl*tempz3l_att(i,j,k,thread_id)
+
+                   ! precompute some sums to save CPU time
+                   duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
+                   duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
+                   duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
+
+                   ! compute deviatoric strain
+                   if( SIMULATION_TYPE == 3 ) &
+                        epsilon_trace_over_3(i,j,k,ispec) = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
+                   epsilondev_xx_loc(i,j,k) = duxdxl_att - epsilon_trace_over_3(i,j,k,ispec)
+                   epsilondev_yy_loc(i,j,k) = duydyl_att - epsilon_trace_over_3(i,j,k,ispec)
+                   epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl_att
+                   epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
+                   epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl_att
+                else
+                   ! computes deviatoric strain attenuation and/or for kernel calculations
+                   if (COMPUTE_AND_STORE_STRAIN) then
+                      templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+                      if( SIMULATION_TYPE == 3 ) epsilon_trace_over_3(i,j,k,ispec) = templ
+                      epsilondev_xx_loc(i,j,k) = duxdxl - templ
+                      epsilondev_yy_loc(i,j,k) = duydyl - templ
+                      epsilondev_xy_loc(i,j,k) = 0.5 * duxdyl_plus_duydxl
+                      epsilondev_xz_loc(i,j,k) = 0.5 * duzdxl_plus_duxdzl
+                      epsilondev_yz_loc(i,j,k) = 0.5 * duzdyl_plus_duydzl
+                   endif
                 endif
 
                 kappal = kappastore(i,j,k,ispec)
@@ -760,6 +923,9 @@
   ! deallocate(dummyx_loc)
   ! deallocate(dummyy_loc)
   ! deallocate(dummyz_loc)
+  ! deallocate(dummyx_loc_att)
+  ! deallocate(dummyy_loc_att)
+  ! deallocate(dummyz_loc_att)
   ! deallocate(newtempx1)
   ! deallocate(newtempx2)
   ! deallocate(newtempx3)
@@ -778,6 +944,15 @@
   ! deallocate(tempz1)
   ! deallocate(tempz2)
   ! deallocate(tempz3)
+  ! deallocate(tempx1_att)
+  ! deallocate(tempx2_att)
+  ! deallocate(tempx3_att)
+  ! deallocate(tempy1_att)
+  ! deallocate(tempy2_att)
+  ! deallocate(tempy3_att)
+  ! deallocate(tempz1_att)
+  ! deallocate(tempz2_att)
+  ! deallocate(tempz3_att)
 
   ! accel(:,:) = accel_omp(:,:,1)
 

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/prepare_timerun.F90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/prepare_timerun.F90	2012-11-15 13:41:58 UTC (rev 21036)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/prepare_timerun.F90	2012-11-15 14:59:03 UTC (rev 21037)
@@ -1279,6 +1279,9 @@
     allocate(dummyx_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
     allocate(dummyy_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
     allocate(dummyz_loc(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(dummyx_loc_att(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(dummyy_loc_att(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(dummyz_loc_att(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
     allocate(newtempx1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
     allocate(newtempx2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
     allocate(newtempx3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
@@ -1297,6 +1300,15 @@
     allocate(tempz1(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
     allocate(tempz2(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
     allocate(tempz3(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempx1_att(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempx2_att(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempx3_att(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempy1_att(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempy2_att(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempy3_att(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempz1_att(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempz2_att(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
+    allocate(tempz3_att(NGLLX,NGLLY,NGLLZ,NUM_THREADS))
 
     ! set num_elem_colors array in case no mesh coloring is used
     if( .not. USE_MESH_COLORING_GPU ) then

Modified: seismo/3D/SPECFEM3D/trunk/src/specfem3D/specfem3D_par.f90
===================================================================
--- seismo/3D/SPECFEM3D/trunk/src/specfem3D/specfem3D_par.f90	2012-11-15 13:41:58 UTC (rev 21036)
+++ seismo/3D/SPECFEM3D/trunk/src/specfem3D/specfem3D_par.f90	2012-11-15 14:59:03 UTC (rev 21037)
@@ -302,6 +302,12 @@
        dummyx_loc,dummyy_loc,dummyz_loc,newtempx1,newtempx2,newtempx3,&
        newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3,&
        tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
+  
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:),allocatable :: &
+       dummyx_loc_att,dummyy_loc_att,dummyz_loc_att, &
+       tempx1_att,tempx2_att,tempx3_att, &
+       tempy1_att,tempy2_att,tempy3_att, &
+       tempz1_att,tempz2_att,tempz3_att
 
 ! mass matrix
   real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass



More information about the CIG-COMMITS mailing list