[cig-commits] [commit] devel, master: Combine duplicate get_global subroutines. (0d35ab5)
cig_noreply at geodynamics.org
cig_noreply at geodynamics.org
Thu Nov 6 08:20:10 PST 2014
Repository : https://github.com/geodynamics/specfem3d_globe
On branches: devel,master
Link : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f
>---------------------------------------------------------------
commit 0d35ab5f73fc93476daaf2b51297a6c683dd7426
Author: Elliott Sales de Andrade <esalesde at physics.utoronto.ca>
Date: Sun Jun 1 01:06:04 2014 -0400
Combine duplicate get_global subroutines.
These are all the same save for one loop bound. But that nested loop can
be re-written as a single loop that works for all cases.
>---------------------------------------------------------------
0d35ab5f73fc93476daaf2b51297a6c683dd7426
src/auxiliaries/create_movie_AVS_DX.f90 | 230 +------------------------------
src/auxiliaries/rules.mk | 1 +
src/meshfem3D/create_regions_mesh.F90 | 2 +-
src/meshfem3D/rules.mk | 2 +-
src/{meshfem3D => shared}/get_global.f90 | 189 +++++++++++++------------
src/shared/rules.mk | 1 +
6 files changed, 104 insertions(+), 321 deletions(-)
diff --git a/src/auxiliaries/create_movie_AVS_DX.f90 b/src/auxiliaries/create_movie_AVS_DX.f90
index 5d93e71..5e8a0f2 100644
--- a/src/auxiliaries/create_movie_AVS_DX.f90
+++ b/src/auxiliaries/create_movie_AVS_DX.f90
@@ -610,7 +610,7 @@
!--- sort the list based upon coordinates to get rid of multiples
print *,'sorting list of points'
- call get_global_AVS(nspectot_AVS_max,xp,yp,zp,iglob,locval,ifseg,nglob,npointot)
+ call get_global(npointot,xp,yp,zp,iglob,locval,ifseg,nglob)
!--- print total number of points found
print *
@@ -861,231 +861,3 @@
end subroutine read_AVS_DX_parameters
-!
-!=====================================================================
-!
-
-
- subroutine get_global_AVS(nspec,xp,yp,zp,iglob,locval,ifseg,nglob,npointot)
-
-! this routine MUST be in double precision to avoid sensitivity
-! to roundoff errors in the coordinates of the points
-
-! leave sorting subroutines in same source file to allow for inlining
-
- use constants
-
- implicit none
-
- integer npointot
- integer iglob(npointot),locval(npointot)
- logical ifseg(npointot)
- double precision xp(npointot),yp(npointot),zp(npointot)
- integer nspec,nglob
-
- integer ispec,i,j
- integer ieoff,ilocnum,nseg,ioff,iseg,ig
-
-! for dynamic memory allocation
- integer ierror
-
- integer, dimension(:), allocatable :: ind,ninseg,iwork
- double precision, dimension(:), allocatable :: work
-
- print *
- print *,'Allocating arrays of size ',npointot
- print *
-
-! dynamically allocate arrays
- allocate(ind(npointot),stat=ierror)
- if(ierror /= 0) stop 'error while allocating ind'
-
- allocate(ninseg(npointot),stat=ierror)
- if(ierror /= 0) stop 'error while allocating ninseg'
-
- allocate(iwork(npointot),stat=ierror)
- if(ierror /= 0) stop 'error while allocating iwork'
-
- allocate(work(npointot),stat=ierror)
- if(ierror /= 0) stop 'error while allocating work'
-
-! establish initial pointers
- do ispec=1,nspec
- ieoff=NGNOD2D_AVS_DX*(ispec-1)
- do ilocnum=1,NGNOD2D_AVS_DX
- locval(ieoff+ilocnum)=ieoff+ilocnum
- enddo
- enddo
-
- ifseg(:)=.false.
-
- nseg=1
- ifseg(1)=.true.
- ninseg(1)=npointot
-
- do j=1,NDIM
-
-! sort within each segment
- ioff=1
- do iseg=1,nseg
- if(j == 1) then
- call rank(xp(ioff),ind,ninseg(iseg))
- else if(j == 2) then
- call rank(yp(ioff),ind,ninseg(iseg))
- else
- call rank(zp(ioff),ind,ninseg(iseg))
- endif
- call swap_all(locval(ioff),xp(ioff),yp(ioff),zp(ioff),iwork,work,ind,ninseg(iseg))
- ioff=ioff+ninseg(iseg)
- enddo
-
-! check for jumps in current coordinate
-! compare the coordinates of the points within a small tolerance
- if(j == 1) then
- do i=2,npointot
- if(dabs(xp(i)-xp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
- enddo
- else if(j == 2) then
- do i=2,npointot
- if(dabs(yp(i)-yp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
- enddo
- else
- do i=2,npointot
- if(dabs(zp(i)-zp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
- enddo
- endif
-
-! count up number of different segments
- nseg=0
- do i=1,npointot
- if(ifseg(i)) then
- nseg=nseg+1
- ninseg(nseg)=1
- else
- ninseg(nseg)=ninseg(nseg)+1
- endif
- enddo
- enddo
-
-! assign global node numbers (now sorted lexicographically)
- ig=0
- do i=1,npointot
- if(ifseg(i)) ig=ig+1
- iglob(locval(i))=ig
- enddo
-
- nglob=ig
-
-! deallocate arrays
- deallocate(ind)
- deallocate(ninseg)
- deallocate(iwork)
- deallocate(work)
-
-! -----------------------------------
-
-! get_global_AVS internal procedures follow
-
-! sorting routines put in same file to allow for inlining
-
- contains
-
-! -----------------------------------
-
- subroutine rank(A,IND,N)
-!
-! Use Heap Sort (Numerical Recipes)
-!
- implicit none
-
- integer n
- double precision A(n)
- integer IND(n)
-
- integer i,j,l,ir,indx
- double precision q
-
- do j=1,n
- IND(j)=j
- enddo
-
- if (n == 1) return
-
- L=n/2+1
- ir=n
- 100 CONTINUE
- IF (l>1) THEN
- l=l-1
- indx=ind(l)
- q=a(indx)
- ELSE
- indx=ind(ir)
- q=a(indx)
- ind(ir)=ind(1)
- ir=ir-1
- if (ir == 1) then
- ind(1)=indx
- return
- endif
- endif
- i=l
- j=l+l
- 200 CONTINUE
- IF (J <= IR) THEN
- IF (J<IR) THEN
- IF ( A(IND(j))<A(IND(j+1)) ) j=j+1
- endif
- IF (q<A(IND(j))) THEN
- IND(I)=IND(J)
- I=J
- J=J+J
- ELSE
- J=IR+1
- endif
- goto 200
- endif
- IND(I)=INDX
- goto 100
- end subroutine rank
-
-! ------------------------------------------------------------------
-
- subroutine swap_all(IA,A,B,C,IW,W,ind,n)
-!
-! swap arrays IA, A, B and C according to addressing in array IND
-!
- implicit none
-
- integer n
-
- integer IND(n)
- integer IA(n),IW(n)
- double precision A(n),B(n),C(n),W(n)
-
- integer i
-
- IW(:) = IA(:)
- W(:) = A(:)
-
- do i=1,n
- IA(i)=IW(ind(i))
- A(i)=W(ind(i))
- enddo
-
- W(:) = B(:)
-
- do i=1,n
- B(i)=W(ind(i))
- enddo
-
- W(:) = C(:)
-
- do i=1,n
- C(i)=W(ind(i))
- enddo
-
- end subroutine swap_all
-
-! ------------------------------------------------------------------
-
- end subroutine get_global_AVS
diff --git a/src/auxiliaries/rules.mk b/src/auxiliaries/rules.mk
index cb1d2e6..583b616 100644
--- a/src/auxiliaries/rules.mk
+++ b/src/auxiliaries/rules.mk
@@ -67,6 +67,7 @@ auxiliaries_SHARED_OBJECTS = \
$O/count_points.shared.o \
$O/create_serial_name_database.shared.o \
$O/define_all_layers.shared.o \
+ $O/get_global.shared.o \
$O/get_model_parameters.shared.o \
$O/get_timestep_and_layers.shared.o \
$O/get_value_parameters.shared.o \
diff --git a/src/meshfem3D/create_regions_mesh.F90 b/src/meshfem3D/create_regions_mesh.F90
index 89c605d..ba8b783 100644
--- a/src/meshfem3D/create_regions_mesh.F90
+++ b/src/meshfem3D/create_regions_mesh.F90
@@ -1187,7 +1187,7 @@
enddo
enddo
- call get_global(nspec,xp,yp,zp,ibool,locval,ifseg,nglob,npointot)
+ call get_global(npointot,xp,yp,zp,ibool,locval,ifseg,nglob)
deallocate(xp,yp,zp)
deallocate(locval,ifseg)
diff --git a/src/meshfem3D/rules.mk b/src/meshfem3D/rules.mk
index 8c3ee5d..500fc69 100644
--- a/src/meshfem3D/rules.mk
+++ b/src/meshfem3D/rules.mk
@@ -62,7 +62,6 @@ meshfem3D_OBJECTS = \
$O/fix_non_blocking_flags.check.o \
$O/get_absorb.check.o \
$O/get_ellipticity.check.o \
- $O/get_global.check.o \
$O/get_jacobian_boundaries.check.o \
$O/get_jacobian_discontinuities.check.o \
$O/get_model.check.o \
@@ -165,6 +164,7 @@ meshfem3D_SHARED_OBJECTS = \
$O/euler_angles.shared.o \
$O/exit_mpi.shared.o \
$O/force_ftz.cc.o \
+ $O/get_global.shared.o \
$O/get_model_parameters.shared.o \
$O/get_timestep_and_layers.shared.o \
$O/get_value_parameters.shared.o \
diff --git a/src/meshfem3D/get_global.f90 b/src/shared/get_global.f90
similarity index 84%
rename from src/meshfem3D/get_global.f90
rename to src/shared/get_global.f90
index ee356ef..1d3e294 100644
--- a/src/meshfem3D/get_global.f90
+++ b/src/shared/get_global.f90
@@ -25,7 +25,7 @@
!
!=====================================================================
- subroutine get_global(nspec,xp,yp,zp,iglob,locval,ifseg,nglob,npointot)
+ subroutine get_global(npointot,xp,yp,zp,iglob,locval,ifseg,nglob)
! this routine MUST be in double precision to avoid sensitivity
! to roundoff errors in the coordinates of the points
@@ -39,7 +39,7 @@
implicit none
! input parameters
- integer, intent(in) :: npointot,nspec
+ integer, intent(in) :: npointot
double precision, dimension(npointot), intent(in) :: xp,yp,zp
@@ -50,23 +50,25 @@
! local variables
double precision, dimension(:), allocatable :: work
integer, dimension(:), allocatable :: ind,ninseg,iwork
- integer :: ispec,i,j,ier
+ integer :: i,j,ier
integer :: ieoff,ilocnum,nseg,ioff,iseg,ig
! dynamically allocate arrays
- allocate(ind(npointot), &
- ninseg(npointot), &
- iwork(npointot), &
- work(npointot), &
- stat=ier)
- if( ier /= 0 ) stop 'error allocating local array in get_global'
+ allocate(ind(npointot),stat=ier)
+ if (ier /= 0) stop 'error while allocating ind'
+
+ allocate(ninseg(npointot),stat=ier)
+ if (ier /= 0) stop 'error while allocating ninseg'
+
+ allocate(iwork(npointot),stat=ier)
+ if (ier /= 0) stop 'error while allocating iwork'
+
+ allocate(work(npointot),stat=ier)
+ if (ier /= 0) stop 'error while allocating work'
! establish initial pointers
- do ispec=1,nspec
- ieoff=NGLLX * NGLLY * NGLLZ * (ispec-1)
- do ilocnum=1,NGLLX * NGLLY * NGLLZ
- locval(ilocnum+ieoff)=ilocnum+ieoff
- enddo
+ do i=1,npointot
+ locval(i) = i
enddo
ifseg(:) = .false.
@@ -79,9 +81,9 @@
! sort within each segment
ioff=1
do iseg=1,nseg
- if(j == 1) then
+ if (j == 1) then
call rank(xp(ioff),ind,ninseg(iseg))
- else if(j == 2) then
+ else if (j == 2) then
call rank(yp(ioff),ind,ninseg(iseg))
else
call rank(zp(ioff),ind,ninseg(iseg))
@@ -94,24 +96,24 @@
! check for jumps in current coordinate
! compare the coordinates of the points within a small tolerance
- if(j == 1) then
+ if (j == 1) then
do i=2,npointot
- if(dabs(xp(i)-xp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+ if (dabs(xp(i)-xp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
enddo
- else if(j == 2) then
+ else if (j == 2) then
do i=2,npointot
- if(dabs(yp(i)-yp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+ if (dabs(yp(i)-yp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
enddo
else
do i=2,npointot
- if(dabs(zp(i)-zp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
+ if (dabs(zp(i)-zp(i-1)) > SMALLVALTOL) ifseg(i)=.true.
enddo
endif
! count up number of different segments
nseg=0
do i=1,npointot
- if(ifseg(i)) then
+ if (ifseg(i)) then
nseg=nseg+1
ninseg(nseg)=1
else
@@ -123,7 +125,7 @@
! assign global node numbers (now sorted lexicographically)
ig=0
do i=1,npointot
- if(ifseg(i)) ig=ig+1
+ if (ifseg(i)) ig=ig+1
iglob(locval(i))=ig
enddo
@@ -132,75 +134,18 @@
! deallocate arrays
deallocate(ind,ninseg,iwork,work)
- end subroutine get_global
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-
- subroutine get_global_indirect_addressing(nspec,nglob,ibool)
-
-!
-!- we can create a new indirect addressing to reduce cache misses
-! (put into this subroutine but compiler keeps on complaining that it can't vectorize loops...)
-
- use constants
-
- implicit none
-
- integer,intent(in) :: nspec,nglob
- integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-
- ! local parameters
- ! mask to sort ibool
- integer, dimension(:), allocatable :: mask_ibool
- integer, dimension(:,:,:,:), allocatable :: copy_ibool_ori
- integer :: inumber
- integer:: i,j,k,ispec,ier
-
- ! copies original array
- allocate(copy_ibool_ori(NGLLX,NGLLY,NGLLZ,nspec), &
- mask_ibool(nglob), &
- stat=ier)
- if(ier /= 0) stop 'error allocating local arrays in get_global_indirect_addressing'
+! ------------------------------------------------------------------
- ! initializes arrays
- mask_ibool(:) = -1
- copy_ibool_ori(:,:,:,:) = ibool(:,:,:,:)
+! get_global internal procedures follow
- ! reduces misses
- inumber = 0
- do ispec=1,nspec
- do k=1,NGLLZ
- do j=1,NGLLY
- do i=1,NGLLX
- if(mask_ibool(copy_ibool_ori(i,j,k,ispec)) == -1) then
- ! creates a new point
- inumber = inumber + 1
- ibool(i,j,k,ispec) = inumber
- mask_ibool(copy_ibool_ori(i,j,k,ispec)) = inumber
- else
- ! uses an existing point created previously
- ibool(i,j,k,ispec) = mask_ibool(copy_ibool_ori(i,j,k,ispec))
- endif
- enddo
- enddo
- enddo
- enddo
-
- ! cleanup
- deallocate(copy_ibool_ori,stat=ier); if(ier /= 0) stop 'error in deallocate'
- deallocate(mask_ibool,stat=ier); if(ier /= 0) stop 'error in deallocate'
+! sorting routines put in same file to allow for inlining
- end subroutine get_global_indirect_addressing
+ contains
!
!-------------------------------------------------------------------------------------------------
!
-! sorting routines put in same file to allow for inlining
-
subroutine rank(A,IND,N)
!
! Use Heap Sort (Numerical Recipes)
@@ -224,9 +169,9 @@
L = n/2 + 1
ir = n
- do while( .true. )
+ do while (.true.)
- IF ( l > 1 ) THEN
+ IF (l > 1) THEN
l = l-1
indx = ind(l)
q = a(indx)
@@ -246,11 +191,11 @@
i = l
j = l+l
- do while( J <= IR )
- IF ( J < IR ) THEN
- IF ( A(IND(j)) < A(IND(j+1)) ) j=j+1
+ do while (J <= IR)
+ IF (J < IR) THEN
+ IF (A(IND(j)) < A(IND(j+1))) j=j+1
endif
- IF ( q < A(IND(j)) ) THEN
+ IF (q < A(IND(j))) THEN
IND(I) = IND(J)
I = J
J = J+J
@@ -304,3 +249,67 @@
end subroutine swap_all
+! ------------------------------------------------------------------
+
+ end subroutine get_global
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+ subroutine get_global_indirect_addressing(nspec,nglob,ibool)
+
+!
+!- we can create a new indirect addressing to reduce cache misses
+! (put into this subroutine but compiler keeps on complaining that it can't vectorize loops...)
+
+ use constants
+
+ implicit none
+
+ integer,intent(in) :: nspec,nglob
+ integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+ ! local parameters
+ ! mask to sort ibool
+ integer, dimension(:), allocatable :: mask_ibool
+ integer, dimension(:,:,:,:), allocatable :: copy_ibool_ori
+ integer :: inumber
+ integer:: i,j,k,ispec,ier
+
+ ! copies original array
+ allocate(copy_ibool_ori(NGLLX,NGLLY,NGLLZ,nspec), &
+ mask_ibool(nglob), &
+ stat=ier)
+ if(ier /= 0) stop 'error allocating local arrays in get_global_indirect_addressing'
+
+ ! initializes arrays
+ mask_ibool(:) = -1
+ copy_ibool_ori(:,:,:,:) = ibool(:,:,:,:)
+
+ ! reduces misses
+ inumber = 0
+ do ispec=1,nspec
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
+ if(mask_ibool(copy_ibool_ori(i,j,k,ispec)) == -1) then
+ ! creates a new point
+ inumber = inumber + 1
+ ibool(i,j,k,ispec) = inumber
+ mask_ibool(copy_ibool_ori(i,j,k,ispec)) = inumber
+ else
+ ! uses an existing point created previously
+ ibool(i,j,k,ispec) = mask_ibool(copy_ibool_ori(i,j,k,ispec))
+ endif
+ enddo
+ enddo
+ enddo
+ enddo
+
+ ! cleanup
+ deallocate(copy_ibool_ori,stat=ier); if(ier /= 0) stop 'error in deallocate'
+ deallocate(mask_ibool,stat=ier); if(ier /= 0) stop 'error in deallocate'
+
+ end subroutine get_global_indirect_addressing
+
diff --git a/src/shared/rules.mk b/src/shared/rules.mk
index 97738d9..2f89a97 100644
--- a/src/shared/rules.mk
+++ b/src/shared/rules.mk
@@ -45,6 +45,7 @@ shared_OBJECTS = \
$O/euler_angles.shared.o \
$O/exit_mpi.shared.o \
$O/force_ftz.cc.o \
+ $O/get_global.shared.o \
$O/get_model_parameters.shared.o \
$O/get_timestep_and_layers.shared.o \
$O/get_value_parameters.shared.o \
More information about the CIG-COMMITS
mailing list