[cig-commits] [commit] QA: speeds up mesh resolution check; updates comments and replaces dot_product() call with explicit calculation (20f1434)

cig_noreply at geodynamics.org cig_noreply at geodynamics.org
Wed Feb 5 06:53:01 PST 2014


Repository : ssh://geoshell/specfem3d

On branch  : QA
Link       : https://github.com/geodynamics/specfem3d/compare/e38ba401e045bc221ec1099e88d3aa31e7b4773c...2dbb673169cd90b856b600c84a8119509592b7d0

>---------------------------------------------------------------

commit 20f14341c701f42321ca035e70f2292dda3d8e79
Author: daniel peter <peterda at ethz.ch>
Date:   Tue Feb 4 15:01:50 2014 +0100

    speeds up mesh resolution check; updates comments and replaces dot_product() call with explicit calculation


>---------------------------------------------------------------

20f14341c701f42321ca035e70f2292dda3d8e79
 src/shared/check_mesh_resolution.f90 | 300 +++++++++++++++++++++++++----------
 src/specfem3D/compute_kernels.f90    |  31 ++--
 src/specfem3D/prepare_timerun.F90    |   5 +-
 3 files changed, 233 insertions(+), 103 deletions(-)

diff --git a/src/shared/check_mesh_resolution.f90 b/src/shared/check_mesh_resolution.f90
index 795abe0..e7d04f4 100644
--- a/src/shared/check_mesh_resolution.f90
+++ b/src/shared/check_mesh_resolution.f90
@@ -217,10 +217,8 @@
     !pmax_glob = max(pmax_glob,pmax)
 
     ! computes minimum and maximum distance of neighbor GLL points in this grid cell
-    ! exact way (expensive)
     call get_GLL_minmaxdistance(distance_min,distance_max,ispec, &
                                 NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore)
-    ! approximate way (based on theoretical GLL point spacing)
 
     distance_min_glob = min(distance_min_glob, distance_min)
     distance_max_glob = max(distance_max_glob, distance_max)
@@ -240,7 +238,6 @@
     dt_suggested = COURANT_SUGGESTED * distance_min / max( vpmax,vsmax )
     dt_suggested_glob = min( dt_suggested_glob, dt_suggested)
 
-
     ! debug: for vtk output
     if( SAVE_MESH_FILES ) tmp2(ispec) = pmax
 
@@ -504,6 +501,13 @@
   integer:: ier
   character(len=256) :: filename,prname
 
+  ! timing
+  double precision, external :: wtime
+  double precision :: time_start,tCPU
+
+  ! timing gets MPI starting time
+  time_start = wtime()
+
   ! initializations
   if( DT <= 0.0d0) then
     DT_PRESENT = .false.
@@ -561,13 +565,6 @@
     vsmin_glob = min ( vsmin_glob, vsmin)
     vsmax_glob = max ( vsmax_glob, vsmax)
 
-    ! computes minimum and maximum distance of neighbor GLL points in this grid cell
-    call get_GLL_minmaxdistance(distance_min,distance_max,ispec, &
-                          NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore)
-
-    distance_min_glob = min( distance_min_glob, distance_min)
-    distance_max_glob = max( distance_max_glob, distance_max)
-
     ! computes minimum and maximum size of this grid cell
     call get_elem_minmaxsize(elemsize_min,elemsize_max,ispec, &
                           NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore)
@@ -575,22 +572,6 @@
     elemsize_min_glob = min( elemsize_min_glob, elemsize_min)
     elemsize_max_glob = max( elemsize_max_glob, elemsize_max)
 
-    ! courant number
-    ! based on minimum GLL point distance and maximum velocity
-    ! i.e. on the maximum ratio of ( velocity / gridsize )
-    if( DT_PRESENT ) then
-      cmax = max( vpmax,vp2max,vsmax ) * DT / distance_min
-      cmax_glob = max(cmax_glob,cmax)
-
-      ! debug: for vtk output
-      if( SAVE_MESH_FILES ) tmp1(ispec) = cmax
-
-    endif
-
-    ! suggested timestep
-    dt_suggested = COURANT_SUGGESTED * distance_min / max( vpmax,vp2max,vsmax )
-    dt_suggested_glob = min( dt_suggested_glob, dt_suggested)
-
     ! estimation of minimum period resolved
     ! based on average GLL distance within element and minimum velocity
     !
@@ -619,6 +600,29 @@
     !pmax = distance_max / min( vpmin,vsmin ) * NELEM_PER_WAVELENGTH
     !pmax_glob = max(pmax_glob,pmax)
 
+    ! computes minimum and maximum distance of neighbor GLL points in this grid cell
+    call get_GLL_minmaxdistance(distance_min,distance_max,ispec, &
+                          NSPEC_AB,NGLOB_AB,ibool,xstore,ystore,zstore)
+
+    distance_min_glob = min( distance_min_glob, distance_min)
+    distance_max_glob = max( distance_max_glob, distance_max)
+
+    ! courant number
+    ! based on minimum GLL point distance and maximum velocity
+    ! i.e. on the maximum ratio of ( velocity / gridsize )
+    if( DT_PRESENT ) then
+      cmax = max( vpmax,vp2max,vsmax ) * DT / distance_min
+      cmax_glob = max(cmax_glob,cmax)
+
+      ! debug: for vtk output
+      if( SAVE_MESH_FILES ) tmp1(ispec) = cmax
+
+    endif
+
+    ! suggested timestep
+    dt_suggested = COURANT_SUGGESTED * distance_min / max( vpmax,vp2max,vsmax )
+    dt_suggested_glob = min( dt_suggested_glob, dt_suggested)
+
     ! debug: for vtk output
     if( SAVE_MESH_FILES ) tmp2(ispec) = pmax
 
@@ -782,6 +786,14 @@
   call bcast_all_cr(tmp_val,1)
   min_resolved_period = tmp_val(1)
 
+  ! timing
+  tCPU = wtime() - time_start
+  if( myrank == 0 ) then
+    write(IMAIN,*) "Elapsed time for checking mesh resolution in seconds = ", tCPU
+    ! flushes file buffer for main output file (IMAIN)
+    call flush_IMAIN()
+  endif
+
   ! debug: for vtk output
   if( SAVE_MESH_FILES ) then
     call create_name_database(prname,myrank,LOCAL_PATH)
@@ -808,7 +820,95 @@
 
 
   subroutine get_vpvs_minmax(vpmin,vpmax,vsmin,vsmax,ispec,has_vs_zero, &
-                            NSPEC_AB,kappastore,mustore,rho_vp,rho_vs)
+                             NSPEC_AB,kappastore,mustore,rho_vp,rho_vs)
+
+! calculates the min/max size of the specified element (ispec) for acoustic / elastic domains
+
+  implicit none
+
+  include "constants.h"
+
+  real(kind=CUSTOM_REAL) :: vpmin,vpmax,vsmin,vsmax
+
+  integer :: ispec
+  logical :: has_vs_zero
+
+  integer :: NSPEC_AB
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_AB) :: &
+    kappastore,mustore,rho_vp,rho_vs
+
+  ! local parameters
+  real(kind=CUSTOM_REAL) :: vp,vs
+  integer :: i,j,k
+  integer :: incrx,incry,incrz
+
+  ! element looping
+  ! note: the mesh resolution check is only approximative, it is thus sufficient to check only a few element points
+  logical,parameter :: MIDPOINT_CHECK_ONLY = .true.
+
+  ! initializes
+  vpmin = HUGEVAL
+  vpmax = -HUGEVAL
+  vsmin = HUGEVAL
+  vsmax = -HUGEVAL
+
+  ! looping increments
+  if( MIDPOINT_CHECK_ONLY ) then
+    ! only corners and midpoints
+    incrx = NGLLX/2
+    incry = NGLLY/2
+    incrz = NGLLZ/2
+  else
+    ! all gll points
+    incrx = 1
+    incry = 1
+    incrz = 1
+  endif
+
+  ! loops over gll points
+  ! (avoids temporary 3d arrays)
+  do k=1,NGLLZ,incrz
+    do j=1,NGLLY,incry
+      do i=1,NGLLX,incrx
+
+        ! vp
+        if( rho_vp(i,j,k,ispec) > TINYVAL ) then
+          vp = (FOUR_THIRDS * mustore(i,j,k,ispec) + kappastore(i,j,k,ispec)) / rho_vp(i,j,k,ispec)
+        else
+          vp = 0.0_CUSTOM_REAL
+        endif
+        ! min/max
+        if( vp < vpmin ) vpmin = vp
+        if( vp > vpmax ) vpmax = vp
+
+        ! vs
+        if( rho_vs(i,j,k,ispec) > TINYVAL ) then
+          vs = mustore(i,j,k,ispec) / rho_vs(i,j,k,ispec)
+        else
+          vs = 0.0_CUSTOM_REAL
+        endif
+        ! ignore fluid regions with Vs = 0
+        if( vs > TINYVAL ) then
+          if( vs < vsmin ) vsmin = vs
+        else
+          has_vs_zero = .true.
+        endif
+        if( vs > vsmax ) vsmax = vs
+
+      enddo
+    enddo
+  enddo
+
+  end subroutine get_vpvs_minmax
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+  subroutine get_vpvs_minmax_alt(vpmin,vpmax,vsmin,vsmax,ispec,has_vs_zero, &
+                                 NSPEC_AB,kappastore,mustore,rho_vp,rho_vs)
 
 ! calculates the min/max size of the specified element (ispec) for acoustic / elastic domains
 
@@ -840,7 +940,7 @@
     vp_elem(:,:,:) = (FOUR_THIRDS * mustore(:,:,:,ispec) &
                     + kappastore(:,:,:,ispec)) / rho_vp(:,:,:,ispec)
   elsewhere
-    vp_elem(:,:,:) = 0.0
+    vp_elem(:,:,:) = 0.0_CUSTOM_REAL
   endwhere
 
   val_min = minval(vp_elem(:,:,:))
@@ -853,7 +953,7 @@
   where( rho_vs(:,:,:,ispec) > TINYVAL )
     vs_elem(:,:,:) = mustore(:,:,:,ispec) / rho_vs(:,:,:,ispec)
   elsewhere
-    vs_elem(:,:,:) = 0.0
+    vs_elem(:,:,:) = 0.0_CUSTOM_REAL
   endwhere
 
   val_min = minval(vs_elem(:,:,:))
@@ -867,7 +967,7 @@
   endif
   vsmax = max(vsmax,val_max(1))
 
-  end subroutine get_vpvs_minmax
+  end subroutine get_vpvs_minmax_alt
 
 !
 !-------------------------------------------------------------------------------------------------
@@ -989,53 +1089,57 @@
 
   ! local parameters
   real(kind=CUSTOM_REAL) :: dist
-  integer :: i,j,k,iglob_a,iglob_b
+  real(kind=CUSTOM_REAL) :: x1,x2,y1,y2,z1,z2
+  integer :: i,j,k,iglob1,iglob2
 
   ! initializes
   distance_min = HUGEVAL
   distance_max = -HUGEVAL
 
-  ! loops over all GLL points along X
-  do k=1,NGLLZ
-    do j=1,NGLLY
+  ! loops over all GLL points
+  ! (combines directions to speed up calculations)
+  do k=1,NGLLZ-1
+    do j=1,NGLLY-1
       do i=1,NGLLX-1
-        iglob_a = ibool(i  ,j,k,ispec)
-        iglob_b = ibool(i+1,j,k,ispec)
-        dist =       ( xstore(iglob_a) - xstore(iglob_b) )**2 &
-                   + ( ystore(iglob_a) - ystore(iglob_b) )**2 &
-                   + ( zstore(iglob_a) - zstore(iglob_b) )**2
+        ! reference point
+        iglob1 = ibool(i,j,k,ispec)
+        x1 = xstore(iglob1)
+        y1 = ystore(iglob1)
+        z1 = zstore(iglob1)
+
+        ! along X
+        iglob2 = ibool(i+1,j,k,ispec)
+        x2 = xstore(iglob2)
+        y2 = ystore(iglob2)
+        z2 = zstore(iglob2)
+
+        dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
+
         if( dist < distance_min) distance_min = dist
         if( dist > distance_max) distance_max = dist
-      enddo
-    enddo
-  enddo
 
-  ! loops over all GLL points along Y
-  do k=1,NGLLZ
-    do i=1,NGLLX
-      do j=1,NGLLY-1
-        iglob_a = ibool(i,j  ,k,ispec)
-        iglob_b = ibool(i,j+1,k,ispec)
-        dist =       ( xstore(iglob_a) - xstore(iglob_b) )**2 &
-                   + ( ystore(iglob_a) - ystore(iglob_b) )**2 &
-                   + ( zstore(iglob_a) - zstore(iglob_b) )**2
+        ! along Y
+        iglob2 = ibool(i,j+1,k,ispec)
+        x2 = xstore(iglob2)
+        y2 = ystore(iglob2)
+        z2 = zstore(iglob2)
+
+        dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
+
         if( dist < distance_min) distance_min = dist
         if( dist > distance_max) distance_max = dist
-      enddo
-    enddo
-  enddo
 
-  ! loops over all GLL points along Z
-  do i=1,NGLLX
-    do j=1,NGLLY
-      do k=1,NGLLZ-1
-        iglob_a = ibool(i,j,k  ,ispec)
-        iglob_b = ibool(i,j,k+1,ispec)
-        dist =       ( xstore(iglob_a) - xstore(iglob_b) )**2 &
-                   + ( ystore(iglob_a) - ystore(iglob_b) )**2 &
-                   + ( zstore(iglob_a) - zstore(iglob_b) )**2
+        ! along Z
+        iglob2 = ibool(i,j,k+1,ispec)
+        x2 = xstore(iglob2)
+        y2 = ystore(iglob2)
+        z2 = zstore(iglob2)
+
+        dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
+
         if( dist < distance_min) distance_min = dist
         if( dist > distance_max) distance_max = dist
+
       enddo
     enddo
   enddo
@@ -1069,8 +1173,10 @@
 
   ! local parameters
   real(kind=CUSTOM_REAL) :: dist
+  real(kind=CUSTOM_REAL) :: x1,x2,y1,y2,z1,z2
   integer :: i,j,k
-  integer :: i1,i2,j1,j2,k1,k2,ibool1,ibool2
+  integer :: i1,i2,j1,j2,k1,k2
+  integer :: iglob1,iglob2
 
   ! initializes
   elemsize_min = HUGEVAL
@@ -1079,13 +1185,21 @@
   ! loops over the four edges that are along X
   i1 = 1
   i2 = NGLLX
-  do j = 1, NGLLY, NGLLY-1
-    do k = 1, NGLLZ, NGLLZ-1
-      ibool1 = ibool(i1,j,k,ispec)
-      ibool2 = ibool(i2,j,k,ispec)
-      dist =      (xstore(ibool1) - xstore(ibool2))**2 &
-                + (ystore(ibool1) - ystore(ibool2))**2 &
-                + (zstore(ibool1) - zstore(ibool2))**2
+  do k = 1, NGLLZ, NGLLZ-1
+    do j = 1, NGLLY, NGLLY-1
+      iglob1 = ibool(i1,j,k,ispec)
+      iglob2 = ibool(i2,j,k,ispec)
+
+      x1 = xstore(iglob1)
+      y1 = ystore(iglob1)
+      z1 = zstore(iglob1)
+
+      x2 = xstore(iglob2)
+      y2 = ystore(iglob2)
+      z2 = zstore(iglob2)
+
+      dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
+
       if(dist < elemsize_min) elemsize_min = dist
       if(dist > elemsize_max) elemsize_max = dist
     enddo
@@ -1094,13 +1208,21 @@
   ! loops over the four edges that are along Y
   j1 = 1
   j2 = NGLLY
-  do i = 1, NGLLX, NGLLX-1
-    do k = 1, NGLLZ, NGLLZ-1
-      ibool1 = ibool(i,j1,k,ispec)
-      ibool2 = ibool(i,j2,k,ispec)
-      dist =      (xstore(ibool1) - xstore(ibool2))**2 &
-                + (ystore(ibool1) - ystore(ibool2))**2 &
-                + (zstore(ibool1) - zstore(ibool2))**2
+  do k = 1, NGLLZ, NGLLZ-1
+    do i = 1, NGLLX, NGLLX-1
+      iglob1 = ibool(i,j1,k,ispec)
+      iglob2 = ibool(i,j2,k,ispec)
+
+      x1 = xstore(iglob1)
+      y1 = ystore(iglob1)
+      z1 = zstore(iglob1)
+
+      x2 = xstore(iglob2)
+      y2 = ystore(iglob2)
+      z2 = zstore(iglob2)
+
+      dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
+
       if(dist < elemsize_min) elemsize_min = dist
       if(dist > elemsize_max) elemsize_max = dist
     enddo
@@ -1109,13 +1231,21 @@
   ! loops over the four edges that are along Z
   k1 = 1
   k2 = NGLLZ
-  do i = 1, NGLLX, NGLLX-1
-    do j = 1, NGLLY, NGLLY-1
-      ibool1 = ibool(i,j,k1,ispec)
-      ibool2 = ibool(i,j,k2,ispec)
-      dist =      (xstore(ibool1) - xstore(ibool2))**2 &
-                + (ystore(ibool1) - ystore(ibool2))**2 &
-                + (zstore(ibool1) - zstore(ibool2))**2
+  do j = 1, NGLLY, NGLLY-1
+    do i = 1, NGLLX, NGLLX-1
+      iglob1 = ibool(i,j,k1,ispec)
+      iglob2 = ibool(i,j,k2,ispec)
+
+      x1 = xstore(iglob1)
+      y1 = ystore(iglob1)
+      z1 = zstore(iglob1)
+
+      x2 = xstore(iglob2)
+      y2 = ystore(iglob2)
+      z2 = zstore(iglob2)
+
+      dist = (x1 - x2)*(x1 - x2) + (y1 - y2)*(y1 - y2) + (z1 - z2)*(z1 - z2)
+
       if(dist < elemsize_min) elemsize_min = dist
       if(dist > elemsize_max) elemsize_max = dist
     enddo
diff --git a/src/specfem3D/compute_kernels.f90 b/src/specfem3D/compute_kernels.f90
index f8bbdb6..8aaadd9 100644
--- a/src/specfem3D/compute_kernels.f90
+++ b/src/specfem3D/compute_kernels.f90
@@ -41,7 +41,7 @@
     call compute_kernels_el()
   endif
 
-  ! elastic simulations
+  ! acoustic simulations
   if( ACOUSTIC_SIMULATION ) then
     call compute_kernels_ac()
   endif
@@ -65,7 +65,7 @@
 
   subroutine compute_kernels_el()
 
-! kernel calculations
+! elastic kernel calculations
 ! see e.g. Tromp et al. (2005)
 
   use specfem_par
@@ -183,7 +183,7 @@
 
   subroutine compute_kernels_ac()
 
-! kernel calculations
+! acoustic kernel calculations
 ! see e.g. Tromp et al. (2005)
 
   use specfem_par
@@ -234,7 +234,9 @@
             ! density kernel
             rhol = rhostore(i,j,k,ispec)
             rho_ac_kl(i,j,k,ispec) =  rho_ac_kl(i,j,k,ispec) &
-                      + deltat * rhol * dot_product(accel_elm(:,i,j,k), b_displ_elm(:,i,j,k))
+                      + deltat * rhol * (accel_elm(1,i,j,k) * b_displ_elm(1,i,j,k) &
+                                       + accel_elm(2,i,j,k) * b_displ_elm(2,i,j,k) &
+                                       + accel_elm(3,i,j,k) * b_displ_elm(3,i,j,k))
 
             ! bulk modulus kernel
             kappal = 1._CUSTOM_REAL / kappastore(i,j,k,ispec)
@@ -487,19 +489,16 @@
   ! Computing the 21 strain products without assuming eps(i)*b_eps(j) = eps(j)*b_eps(i)
   p=1
   do i=1,6
-       do j=i,6
-       prod(p)=eps(i)*b_eps(j)
-       if(j>i) then
-            prod(p)=prod(p)+eps(j)*b_eps(i)
-            if(j>3 .and. i<4) prod(p)=prod(p)*2
-       endif
-       if(i>3) prod(p)=prod(p)*4
-       p=p+1
-       enddo
+    do j=i,6
+      prod(p)=eps(i)*b_eps(j)
+      if(j>i) then
+        prod(p)=prod(p)+eps(j)*b_eps(i)
+        if(j>3 .and. i<4) prod(p)=prod(p)*2
+      endif
+      if(i>3) prod(p)=prod(p)*4
+      p=p+1
+    enddo
   enddo
 
   end subroutine compute_strain_product
 
-!
-!-------------------------------------------------------------------------------------------------
-
diff --git a/src/specfem3D/prepare_timerun.F90 b/src/specfem3D/prepare_timerun.F90
index a17aa0e..b3c0190 100644
--- a/src/specfem3D/prepare_timerun.F90
+++ b/src/specfem3D/prepare_timerun.F90
@@ -118,10 +118,11 @@
 
     !daniel debug: total time estimation
     !  average time per element per time step:
-    !     elastic elements    ~ dt = 1.40789368e-05 s
+    !     elastic elements    ~ dt = 1.17e-05 s (intel xeon 2.6GHz, stand 2013)
+    !                              = 3.18e-07 s (Kepler K20x, stand 2013)
     !
     !  total time per time step:
-    !     T_total = dt * nspec
+    !     T_total = dt * nspec_total
     !
     !  total time using nproc processes (slices) for NSTEP time steps:
     !     T_simulation = T_total * NSTEP / nproc



More information about the CIG-COMMITS mailing list