[cig-commits] [commit] devel: updates smoothing and interpolation tool; renames file smooth_weights_vec.f90 to smooth_weights_vec.F90 and adds vectorizations (df001a0)
cig_noreply at geodynamics.org
cig_noreply at geodynamics.org
Thu Dec 11 07:35:43 PST 2014
Repository : https://github.com/geodynamics/specfem3d_globe
On branch : devel
Link : https://github.com/geodynamics/specfem3d_globe/compare/47b844e0941e7031b9b9330a3ed6e3a5e6b6407a...58e8efb773c4727b4a02f43d1ddc2cde8680f383
>---------------------------------------------------------------
commit df001a0119ba5ea9ef376eb3a2e9cd8092e881df
Author: daniel peter <peterda at ethz.ch>
Date: Wed Dec 10 15:42:27 2014 +0100
updates smoothing and interpolation tool; renames file smooth_weights_vec.f90 to smooth_weights_vec.F90 and adds vectorizations
>---------------------------------------------------------------
df001a0119ba5ea9ef376eb3a2e9cd8092e881df
...ooth_weights_vec.f90 => smooth_weights_vec.F90} | 106 +++++++++++++--------
src/tomography/interpolate_model.F90 | 64 +++++++------
src/tomography/smooth_sem.f90 | 2 +-
3 files changed, 103 insertions(+), 69 deletions(-)
diff --git a/src/shared/smooth_weights_vec.f90 b/src/shared/smooth_weights_vec.F90
similarity index 62%
rename from src/shared/smooth_weights_vec.f90
rename to src/shared/smooth_weights_vec.F90
index abb76a0..78f002d 100644
--- a/src/shared/smooth_weights_vec.f90
+++ b/src/shared/smooth_weights_vec.F90
@@ -25,13 +25,16 @@
!
!=====================================================================
+! we switch between vectorized and non-vectorized version by using pre-processor flag FORCE_VECTORIZATION
+! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh
+#include "config.fh"
subroutine smoothing_weights_vec(x0,y0,z0,sigma_h2,sigma_v2,exp_val,&
xx_elem,yy_elem,zz_elem)
- use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ
+ use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,NGLLCUBE
implicit none
@@ -40,14 +43,21 @@
real(kind=CUSTOM_REAL),intent(in) :: x0,y0,z0,sigma_h2,sigma_v2
! local parameters
- integer :: ii,jj,kk
real(kind=CUSTOM_REAL) :: dist_h,dist_v
real(kind=CUSTOM_REAL) :: r0,r1
real(kind=CUSTOM_REAL) :: theta,ratio
real(kind=CUSTOM_REAL) :: x1,y1,z1
-
- ! explicit initialization
- !exp_val(:,:,:) = 0.0_CUSTOM_REAL
+ real(kind=CUSTOM_REAL) :: sigma_h2_inv,sigma_v2_inv
+ real(kind=CUSTOM_REAL) :: val
+
+#ifdef FORCE_VECTORIZATION
+! in this vectorized version we have to assume that N_SLS == 3 in order to be able to unroll and thus suppress
+! an inner loop that would otherwise prevent vectorization; this is safe in practice in all cases because N_SLS == 3
+! in all known applications, and in the main program we check that N_SLS == 3 if FORCE_VECTORIZATION is used and we stop
+ integer :: ijk
+#else
+ integer :: i,j,k
+#endif
! >>>>>
! uniform sigma
@@ -61,50 +71,68 @@
! -(zz(:,:,:,ispec2)-z0)**2/(sigma_v2) ) * factor(:,:,:)
! >>>>>
- do kk = 1, NGLLZ
- do jj = 1, NGLLY
- do ii = 1, NGLLX
- ! point in second slice
+ ! helper variables
+ sigma_h2_inv = 1.0_CUSTOM_REAL / sigma_h2
+ sigma_v2_inv = 1.0_CUSTOM_REAL / sigma_v2
+
+ ! length of first position vector
+ r0 = sqrt( x0*x0 + y0*y0 + z0*z0 )
+
+ DO_LOOP_IJK
+
+ ! point in second slice
+ x1 = xx_elem(INDEX_IJK)
+ y1 = yy_elem(INDEX_IJK)
+ z1 = zz_elem(INDEX_IJK)
+
+ ! without explicit function calls to help compiler optimize loops
+
+ ! length of position vector
+ r1 = sqrt( x1*x1 + y1*y1 + z1*z1 )
+
+ ! vertical distance (squared)
+ dist_v = (r1 - r0)*(r1 - r0)
- ! without explicit function calls to help compiler optimize loops
+ ! only for flat earth with z in depth: dist_v = sqrt( (cz(ispec2)-cz0(ispec))** 2)
- x1 = xx_elem(ii,jj,kk)
- y1 = yy_elem(ii,jj,kk)
- z1 = zz_elem(ii,jj,kk)
+ ! epicentral distance
+ ! (accounting for spherical curvature)
+ ! calculates distance of circular segment
+ ! angle between r0 and r1 in radian
+ ! given by dot-product of two vectors
+ ratio = (x0*x1 + y0*y1 + z0*z1) / (r0 * r1)
- ! vertical distance (squared)
- r0 = sqrt( x0*x0 + y0*y0 + z0*z0 ) ! length of first position vector
- r1 = sqrt( x1*x1 + y1*y1 + z1*z1 )
- dist_v = (r1 - r0)*(r1 - r0)
+ ! checks boundaries of ratio (due to numerical inaccuracies)
+ if (ratio > 1.0_CUSTOM_REAL) then
+ ratio = 1.0_CUSTOM_REAL
+ else if (ratio < -1.0_CUSTOM_REAL) then
+ ratio = -1.0_CUSTOM_REAL
+ endif
- ! only for flat earth with z in depth: dist_v = sqrt( (cz(ispec2)-cz0(ispec))** 2)
+ theta = acos( ratio )
- ! epicentral distance
- ! (accounting for spherical curvature)
- ! calculates distance of circular segment
- ! angle between r0 and r1 in radian
- ! given by dot-product of two vectors
- ratio = (x0*x1 + y0*y1 + z0*z1)/(r0 * r1)
+ ! segment length at heigth of r1 (squared)
+ dist_h = (r1 * theta)*(r1 * theta)
- ! checks boundaries of ratio (due to numerical inaccuracies)
- if (ratio > 1.0_CUSTOM_REAL) ratio = 1.0_CUSTOM_REAL
- if (ratio < -1.0_CUSTOM_REAL) ratio = -1.0_CUSTOM_REAL
+ ! Gaussian function
+ val = - dist_h*sigma_h2_inv - dist_v*sigma_v2_inv
- theta = acos( ratio )
+ !exp_val(i,j,k) = exp(val) ! * factor(i,j,k)
- ! segment length at heigth of r1 (squared)
- dist_h = (r1 * theta)*(r1 * theta)
+ ! limits to single precision
+ if (val < - 86.0) then
+ ! smaller than numerical precision: exp(-86) < 1.e-37
+ exp_val(INDEX_IJK) = 0.0_CUSTOM_REAL
+ else
+ exp_val(INDEX_IJK) = exp(val) ! * factor(i,j,k)
+ endif
- ! Gaussian function
- exp_val(ii,jj,kk) = exp( - dist_h/sigma_h2 - dist_v/sigma_v2 ) ! * factor(ii,jj,kk)
+ ! debug
+ !if (debug) then
+ ! print*,i,j,k,'smoothing:',dist_v,dist_h,sigma_h2,sigma_v2,ratio,theta,'val',- dist_h/sigma_h2 - dist_v/sigma_v2
+ !endif
- ! debug
- !if (debug) then
- ! print*,ii,jj,kk,'smoothing:',dist_v,dist_h,sigma_h2,sigma_v2,ratio,theta,'val',- dist_h/sigma_h2 - dist_v/sigma_v2
- !endif
- enddo
- enddo
- enddo
+ ENDDO_LOOP_IJK
end subroutine smoothing_weights_vec
diff --git a/src/tomography/interpolate_model.F90 b/src/tomography/interpolate_model.F90
index 494067a..6d34439 100644
--- a/src/tomography/interpolate_model.F90
+++ b/src/tomography/interpolate_model.F90
@@ -1349,9 +1349,16 @@
! interpolate model values
do iker = 1,nparams
- call interpolate_limited(xi,eta,gamma,i_selected,j_selected,k_selected,ispec_selected, &
- nspec_max_old,model1(:,:,:,:,iker,rank_selected), &
- val,xigll,yigll,zigll,USE_FALLBACK)
+ if (USE_FALLBACK) then
+ call interpolate_limited(xi,eta,gamma,ispec_selected, &
+ nspec_max_old,model1(:,:,:,:,iker,rank_selected), &
+ val,xigll,yigll,zigll, &
+ i_selected,j_selected,k_selected)
+ else
+ call interpolate(xi,eta,gamma,ispec_selected, &
+ nspec_max_old,model1(:,:,:,:,iker,rank_selected), &
+ val,xigll,yigll,zigll)
+ endif
! sets new model value
model2(i,j,k,ispec,iker) = val
@@ -1902,11 +1909,10 @@
!------------------------------------------------------------------------------
!
- subroutine interpolate_limited(xi,eta,gamma, &
- i_selected,j_selected,k_selected,ielem, &
+ subroutine interpolate_limited(xi,eta,gamma,ielem, &
nspec,model, &
- val, &
- xigll,yigll,zigll,USE_FALLBACK)
+ val,xigll,yigll,zigll, &
+ i_selected,j_selected,k_selected)
use constants, only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ
@@ -1914,7 +1920,7 @@
implicit none
double precision,intent(in):: xi,eta,gamma
- integer,intent(in):: i_selected,j_selected,k_selected,ielem
+ integer,intent(in):: ielem
integer,intent(in):: nspec
real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,nspec),intent(in) :: model
@@ -1926,7 +1932,7 @@
double precision, dimension(NGLLY),intent(in) :: yigll
double precision, dimension(NGLLZ),intent(in) :: zigll
- logical,intent(in) :: USE_FALLBACK
+ integer,intent(in):: i_selected,j_selected,k_selected
! local parameters
double precision :: hxir(NGLLX), hpxir(NGLLX), hetar(NGLLY), hpetar(NGLLY), &
@@ -1935,7 +1941,7 @@
real(kind=CUSTOM_REAL) :: val_initial,val_avg,pert,pert_limit
! percentage
- real(kind=CUSTOM_REAL), parameter :: PERCENTAGE_LIMIT = 0.05
+ real(kind=CUSTOM_REAL), parameter :: PERCENTAGE_LIMIT = 0.01
! interpolation weights
call lagrange_any(xi,NGLLX,xigll,hxir,hpxir)
@@ -1947,7 +1953,7 @@
do k = 1, NGLLZ
do j = 1, NGLLY
do i = 1, NGLLX
- val = val + hxir(i) * hetar(j) * hgammar(k) * model(i,j,k,ielem)
+ val = val + hxir(i) * hetar(j) * hgammar(k) * model(i,j,k,ielem)
enddo
enddo
enddo
@@ -1955,24 +1961,24 @@
! note: interpolation of values close to the surface or 3D moho encounters problems;
! this is a fall-back to the closest point value
!
- ! uses average/closest point value if too far off (by more than 5%)
- if (USE_FALLBACK) then
- ! average value
- val_avg = sum(model(:,:,:,ielem)) / NGLLX / NGLLY / NGLLZ
- ! closest point value
- val_initial = model(i_selected,j_selected,k_selected,ielem)
-
- ! initial difference
- pert = abs(val_initial - val_avg)
-
- ! upper/lower perturbation bound
- pert_limit = PERCENTAGE_LIMIT * abs(val_avg)
- if (pert > pert_limit) pert_limit = pert
-
- ! within a certain percentage range
- if (abs(val - val_avg ) > pert_limit) then
- val = val_initial
- endif
+ ! uses average/closest point value if too far off
+
+ ! closest point value
+ val_initial = model(i_selected,j_selected,k_selected,ielem)
+
+ ! average value
+ val_avg = sum(model(:,:,:,ielem)) / NGLLX / NGLLY / NGLLZ
+
+ ! initial difference
+ pert = abs(val_initial - val_avg)
+
+ ! upper/lower perturbation bound
+ pert_limit = PERCENTAGE_LIMIT * abs(val_avg)
+ if (pert > pert_limit) pert_limit = pert
+
+ ! within a certain percentage range
+ if (abs(val - val_avg ) > pert_limit) then
+ val = val_initial
endif
end subroutine interpolate_limited
diff --git a/src/tomography/smooth_sem.f90 b/src/tomography/smooth_sem.f90
index ea80d02..17b02b4 100644
--- a/src/tomography/smooth_sem.f90
+++ b/src/tomography/smooth_sem.f90
@@ -152,7 +152,7 @@ program smooth_sem_globe
! USER PARAMETERS
! number of steps to reach 100 percent, i.e. 10 outputs info for every 10 percent
- integer,parameter :: NSTEP_PERCENT_INFO = 10
+ integer,parameter :: NSTEP_PERCENT_INFO = 100
! brute-force search for neighboring elements, if set to .false. a kd-tree search will be used
logical,parameter :: DO_BRUTE_FORCE_SEARCH = .false.
More information about the CIG-COMMITS
mailing list