[cig-commits] [commit] devel: updates smoothing and interpolation tool; renames file smooth_weights_vec.f90 to smooth_weights_vec.F90 and adds vectorizations (df001a0)

Thu Dec 11 07:35:43 PST 2014

Repository : https://github.com/geodynamics/specfem3d_globe

On branch  : devel
Link       : https://github.com/geodynamics/specfem3d_globe/compare/47b844e0941e7031b9b9330a3ed6e3a5e6b6407a...58e8efb773c4727b4a02f43d1ddc2cde8680f383

>---------------------------------------------------------------

commit df001a0119ba5ea9ef376eb3a2e9cd8092e881df
Author: daniel peter <peterda at ethz.ch>
Date:   Wed Dec 10 15:42:27 2014 +0100

    updates smoothing and interpolation tool; renames file smooth_weights_vec.f90 to smooth_weights_vec.F90 and adds vectorizations


>---------------------------------------------------------------

df001a0119ba5ea9ef376eb3a2e9cd8092e881df
 ...ooth_weights_vec.f90 => smooth_weights_vec.F90} | 106 +++++++++++++--------
 src/tomography/interpolate_model.F90               |  64 +++++++------
 src/tomography/smooth_sem.f90                      |   2 +-
 3 files changed, 103 insertions(+), 69 deletions(-)

diff --git a/src/shared/smooth_weights_vec.f90 b/src/shared/smooth_weights_vec.F90
similarity index 62%
rename from src/shared/smooth_weights_vec.f90
rename to src/shared/smooth_weights_vec.F90
index abb76a0..78f002d 100644
--- a/src/shared/smooth_weights_vec.f90
+++ b/src/shared/smooth_weights_vec.F90
@@ -25,13 +25,16 @@
 !
 !=====================================================================
 
+! we switch between vectorized and non-vectorized version by using pre-processor flag FORCE_VECTORIZATION
+! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh
+#include "config.fh"
 
 
   subroutine smoothing_weights_vec(x0,y0,z0,sigma_h2,sigma_v2,exp_val,&
                                    xx_elem,yy_elem,zz_elem)
 
 
-  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ
+  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NGLLCUBE
 
   implicit none
 
@@ -40,14 +43,21 @@
   real(kind=CUSTOM_REAL),intent(in) :: x0,y0,z0,sigma_h2,sigma_v2
 
   ! local parameters
-  integer :: ii,jj,kk
   real(kind=CUSTOM_REAL) :: dist_h,dist_v
   real(kind=CUSTOM_REAL) :: r0,r1
   real(kind=CUSTOM_REAL) :: theta,ratio
   real(kind=CUSTOM_REAL) :: x1,y1,z1
-
-  ! explicit initialization
-  !exp_val(:,:,:) = 0.0_CUSTOM_REAL
+  real(kind=CUSTOM_REAL) :: sigma_h2_inv,sigma_v2_inv
+  real(kind=CUSTOM_REAL) :: val
+
+#ifdef FORCE_VECTORIZATION
+! in this vectorized version we have to assume that N_SLS == 3 in order to be able to unroll and thus suppress
+! an inner loop that would otherwise prevent vectorization; this is safe in practice in all cases because N_SLS == 3
+! in all known applications, and in the main program we check that N_SLS == 3 if FORCE_VECTORIZATION is used and we stop
+  integer :: ijk
+#else
+  integer :: i,j,k
+#endif
 
   ! >>>>>
   ! uniform sigma
@@ -61,50 +71,68 @@
   !                      -(zz(:,:,:,ispec2)-z0)**2/(sigma_v2) ) * factor(:,:,:)
   ! >>>>>
 
-  do kk = 1, NGLLZ
-    do jj = 1, NGLLY
-      do ii = 1, NGLLX
-        ! point in second slice
+  ! helper variables
+  sigma_h2_inv = 1.0_CUSTOM_REAL / sigma_h2
+  sigma_v2_inv = 1.0_CUSTOM_REAL / sigma_v2
+
+  ! length of first position vector
+  r0 = sqrt( x0*x0 + y0*y0 + z0*z0 )
+
+  DO_LOOP_IJK
+
+    ! point in second slice
+    x1 = xx_elem(INDEX_IJK)
+    y1 = yy_elem(INDEX_IJK)
+    z1 = zz_elem(INDEX_IJK)
+
+    ! without explicit function calls to help compiler optimize loops
+
+    ! length of position vector
+    r1 = sqrt( x1*x1 + y1*y1 + z1*z1 )
+
+    ! vertical distance (squared)
+    dist_v = (r1 - r0)*(r1 - r0)
 
-        ! without explicit function calls to help compiler optimize loops
+    ! only for flat earth with z in depth: dist_v = sqrt( (cz(ispec2)-cz0(ispec))** 2)
 
-        x1 = xx_elem(ii,jj,kk)
-        y1 = yy_elem(ii,jj,kk)
-        z1 = zz_elem(ii,jj,kk)
+    ! epicentral distance
+    ! (accounting for spherical curvature)
+    ! calculates distance of circular segment
+    ! angle between r0 and r1 in radian
+    ! given by dot-product of two vectors
+    ratio = (x0*x1 + y0*y1 + z0*z1) / (r0 * r1)
 
-        ! vertical distance (squared)
-        r0 = sqrt( x0*x0 + y0*y0 + z0*z0 ) ! length of first position vector
-        r1 = sqrt( x1*x1 + y1*y1 + z1*z1 )
-        dist_v = (r1 - r0)*(r1 - r0)
+    ! checks boundaries of ratio (due to numerical inaccuracies)
+    if (ratio > 1.0_CUSTOM_REAL) then
+      ratio = 1.0_CUSTOM_REAL
+    else if (ratio < -1.0_CUSTOM_REAL) then
+      ratio = -1.0_CUSTOM_REAL
+    endif
 
-        ! only for flat earth with z in depth: dist_v = sqrt( (cz(ispec2)-cz0(ispec))** 2)
+    theta = acos( ratio )
 
-        ! epicentral distance
-        ! (accounting for spherical curvature)
-        ! calculates distance of circular segment
-        ! angle between r0 and r1 in radian
-        ! given by dot-product of two vectors
-        ratio = (x0*x1 + y0*y1 + z0*z1)/(r0 * r1)
+    ! segment length at heigth of r1 (squared)
+    dist_h = (r1 * theta)*(r1 * theta)
 
-        ! checks boundaries of ratio (due to numerical inaccuracies)
-        if (ratio > 1.0_CUSTOM_REAL) ratio = 1.0_CUSTOM_REAL
-        if (ratio < -1.0_CUSTOM_REAL) ratio = -1.0_CUSTOM_REAL
+    ! Gaussian function
+    val = - dist_h*sigma_h2_inv - dist_v*sigma_v2_inv
 
-        theta = acos( ratio )
+    !exp_val(i,j,k) = exp(val)    ! * factor(i,j,k)
 
-        ! segment length at heigth of r1 (squared)
-        dist_h = (r1 * theta)*(r1 * theta)
+    ! limits to single precision
+    if (val < - 86.0) then
+      ! smaller than numerical precision: exp(-86) < 1.e-37
+      exp_val(INDEX_IJK) = 0.0_CUSTOM_REAL
+    else
+      exp_val(INDEX_IJK) = exp(val)    ! * factor(i,j,k)
+    endif
 
-        ! Gaussian function
-        exp_val(ii,jj,kk) = exp( - dist_h/sigma_h2 - dist_v/sigma_v2 )    ! * factor(ii,jj,kk)
+    ! debug
+    !if (debug) then
+    !  print*,i,j,k,'smoothing:',dist_v,dist_h,sigma_h2,sigma_v2,ratio,theta,'val',- dist_h/sigma_h2 - dist_v/sigma_v2
+    !endif
 
-        ! debug
-        !if (debug) then
-        !  print*,ii,jj,kk,'smoothing:',dist_v,dist_h,sigma_h2,sigma_v2,ratio,theta,'val',- dist_h/sigma_h2 - dist_v/sigma_v2
-        !endif
-      enddo
-    enddo
-  enddo
+  ENDDO_LOOP_IJK
 
   end subroutine smoothing_weights_vec
 
diff --git a/src/tomography/interpolate_model.F90 b/src/tomography/interpolate_model.F90
index 494067a..6d34439 100644
--- a/src/tomography/interpolate_model.F90
+++ b/src/tomography/interpolate_model.F90
@@ -1349,9 +1349,16 @@
 
         ! interpolate model values
         do iker = 1,nparams
-          call interpolate_limited(xi,eta,gamma,i_selected,j_selected,k_selected,ispec_selected, &
-                                   nspec_max_old,model1(:,:,:,:,iker,rank_selected), &
-                                   val,xigll,yigll,zigll,USE_FALLBACK)
+          if (USE_FALLBACK) then
+            call interpolate_limited(xi,eta,gamma,ispec_selected, &
+                                     nspec_max_old,model1(:,:,:,:,iker,rank_selected), &
+                                     val,xigll,yigll,zigll, &
+                                     i_selected,j_selected,k_selected)
+          else
+            call interpolate(xi,eta,gamma,ispec_selected, &
+                             nspec_max_old,model1(:,:,:,:,iker,rank_selected), &
+                             val,xigll,yigll,zigll)
+          endif
 
           ! sets new model value
           model2(i,j,k,ispec,iker) = val
@@ -1902,11 +1909,10 @@
 !------------------------------------------------------------------------------
 !
 
-  subroutine interpolate_limited(xi,eta,gamma, &
-                                 i_selected,j_selected,k_selected,ielem, &
+  subroutine interpolate_limited(xi,eta,gamma,ielem, &
                                  nspec,model, &
-                                 val, &
-                                 xigll,yigll,zigll,USE_FALLBACK)
+                                 val,xigll,yigll,zigll, &
+                                 i_selected,j_selected,k_selected)
 
 
   use constants, only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ
@@ -1914,7 +1920,7 @@
   implicit none
 
   double precision,intent(in):: xi,eta,gamma
-  integer,intent(in):: i_selected,j_selected,k_selected,ielem
+  integer,intent(in):: ielem
 
   integer,intent(in):: nspec
   real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: model
@@ -1926,7 +1932,7 @@
   double precision, dimension(NGLLY),intent(in) :: yigll
   double precision, dimension(NGLLZ),intent(in) :: zigll
 
-  logical,intent(in) :: USE_FALLBACK
+  integer,intent(in):: i_selected,j_selected,k_selected
 
   ! local parameters
   double precision :: hxir(NGLLX), hpxir(NGLLX), hetar(NGLLY), hpetar(NGLLY), &
@@ -1935,7 +1941,7 @@
   real(kind=CUSTOM_REAL) :: val_initial,val_avg,pert,pert_limit
 
   ! percentage
-  real(kind=CUSTOM_REAL), parameter :: PERCENTAGE_LIMIT = 0.05
+  real(kind=CUSTOM_REAL), parameter :: PERCENTAGE_LIMIT = 0.01
 
   ! interpolation weights
   call lagrange_any(xi,NGLLX,xigll,hxir,hpxir)
@@ -1947,7 +1953,7 @@
   do k = 1, NGLLZ
     do j = 1, NGLLY
       do i = 1, NGLLX
-          val = val +  hxir(i) * hetar(j) * hgammar(k) * model(i,j,k,ielem)
+        val = val + hxir(i) * hetar(j) * hgammar(k) * model(i,j,k,ielem)
       enddo
     enddo
   enddo
@@ -1955,24 +1961,24 @@
   ! note: interpolation of values close to the surface or 3D moho encounters problems;
   !       this is a fall-back to the closest point value
   !
-  ! uses average/closest point value if too far off (by more than 5%)
-  if (USE_FALLBACK) then
-    ! average value
-    val_avg = sum(model(:,:,:,ielem)) / NGLLX / NGLLY / NGLLZ
-    ! closest point value
-    val_initial = model(i_selected,j_selected,k_selected,ielem)
-
-    ! initial difference
-    pert = abs(val_initial - val_avg)
-
-    ! upper/lower perturbation bound
-    pert_limit = PERCENTAGE_LIMIT * abs(val_avg)
-    if (pert > pert_limit) pert_limit = pert
-
-    ! within a certain percentage range
-    if (abs(val - val_avg ) > pert_limit) then
-      val = val_initial
-    endif
+  ! uses average/closest point value if too far off
+
+  ! closest point value
+  val_initial = model(i_selected,j_selected,k_selected,ielem)
+
+  ! average value
+  val_avg = sum(model(:,:,:,ielem)) / NGLLX / NGLLY / NGLLZ
+
+  ! initial difference
+  pert = abs(val_initial - val_avg)
+
+  ! upper/lower perturbation bound
+  pert_limit = PERCENTAGE_LIMIT * abs(val_avg)
+  if (pert > pert_limit) pert_limit = pert
+
+  ! within a certain percentage range
+  if (abs(val - val_avg ) > pert_limit) then
+    val = val_initial
   endif
 
   end subroutine interpolate_limited
diff --git a/src/tomography/smooth_sem.f90 b/src/tomography/smooth_sem.f90
index ea80d02..17b02b4 100644
--- a/src/tomography/smooth_sem.f90
+++ b/src/tomography/smooth_sem.f90
@@ -152,7 +152,7 @@ program smooth_sem_globe
   ! USER PARAMETERS
 
   ! number of steps to reach 100 percent, i.e. 10 outputs info for every 10 percent
-  integer,parameter :: NSTEP_PERCENT_INFO = 10
+  integer,parameter :: NSTEP_PERCENT_INFO = 100
 
   ! brute-force search for neighboring elements, if set to .false. a kd-tree search will be used
   logical,parameter :: DO_BRUTE_FORCE_SEARCH = .false.