[cig-commits] r22629 - seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D
dkomati1 at geodynamics.org
dkomati1 at geodynamics.org
Tue Jul 16 14:55:23 PDT 2013
Author: dkomati1
Date: 2013-07-16 14:55:23 -0700 (Tue, 16 Jul 2013)
New Revision: 22629
Modified:
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.f90
Log:
cleaned compute_forces() routines in order to be able to vectorize them later
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2013-07-16 17:18:13 UTC (rev 22628)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2013-07-16 21:55:23 UTC (rev 22629)
@@ -167,7 +167,7 @@
! for gravity
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
- integer :: ispec,i,j,k,iglob1
+ integer :: ispec,i,j,k,iglob
! this for non blocking MPI
integer :: iphase,icall
@@ -278,35 +278,35 @@
iend = min(ispec_glob+ELEMENTS_NONBLOCKING_CM_IC-1,NSPEC_CRUST_MANTLE)
- do ispec=ispec_glob,iend
+ do ispec = ispec_glob,iend
! hide communications by computing the edges first
if((icall == 2 .and. is_on_a_slice_edge_crust_mantle(ispec)) .or. &
(icall == 1 .and. .not. is_on_a_slice_edge_crust_mantle(ispec))) cycle
- ! subroutines adapted from Deville, Fischer and Mund, High-order methods
- ! for incompressible fluid flow, Cambridge University Press (2002),
- ! pages 386 and 389 and Figure 8.3.1
#ifdef FORCE_VECTORIZATION
do ijk=1,NGLLCUBE
- iglob1 = ibool(ijk,1,1,ispec)
- dummyx_loc(ijk,1,1) = displ_crust_mantle(1,iglob1)
- dummyy_loc(ijk,1,1) = displ_crust_mantle(2,iglob1)
- dummyz_loc(ijk,1,1) = displ_crust_mantle(3,iglob1)
+ iglob = ibool(ijk,1,1,ispec)
+ dummyx_loc(ijk,1,1) = displ_crust_mantle(1,iglob)
+ dummyy_loc(ijk,1,1) = displ_crust_mantle(2,iglob)
+ dummyz_loc(ijk,1,1) = displ_crust_mantle(3,iglob)
enddo
#else
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- iglob1 = ibool(i,j,k,ispec)
- dummyx_loc(i,j,k) = displ_crust_mantle(1,iglob1)
- dummyy_loc(i,j,k) = displ_crust_mantle(2,iglob1)
- dummyz_loc(i,j,k) = displ_crust_mantle(3,iglob1)
+ iglob = ibool(i,j,k,ispec)
+ dummyx_loc(i,j,k) = displ_crust_mantle(1,iglob)
+ dummyy_loc(i,j,k) = displ_crust_mantle(2,iglob)
+ dummyz_loc(i,j,k) = displ_crust_mantle(3,iglob)
enddo
enddo
enddo
#endif
+ ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+ ! for incompressible fluid flow, Cambridge University Press (2002),
+ ! pages 386 and 389 and Figure 8.3.1
do j=1,m2
do i=1,m1
C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
@@ -498,30 +498,34 @@
enddo
enddo
+ ! sum contributions
do k=1,NGLLZ
do j=1,NGLLY
fac1 = wgllwgll_yz(j,k)
do i=1,NGLLX
fac2 = wgllwgll_xz(i,k)
fac3 = wgllwgll_xy(i,j)
-
- ! sum contributions
sum_terms(1,i,j,k) = - (fac1*newtempx1(i,j,k) + fac2*newtempx2(i,j,k) + fac3*newtempx3(i,j,k))
sum_terms(2,i,j,k) = - (fac1*newtempy1(i,j,k) + fac2*newtempy2(i,j,k) + fac3*newtempy3(i,j,k))
sum_terms(3,i,j,k) = - (fac1*newtempz1(i,j,k) + fac2*newtempz2(i,j,k) + fac3*newtempz3(i,j,k))
+ enddo
+ enddo
+ enddo
- if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
+ ! add gravity terms
+ if(GRAVITY_VAL) then
+ sum_terms(:,:,:,:) = sum_terms(:,:,:,:) + rho_s_H(:,:,:,:)
+ endif
- enddo ! NGLLX
- enddo ! NGLLY
- enddo ! NGLLZ
-
- ! sum contributions from each element to the global mesh and add gravity terms
+ ! sum contributions from each element to the global mesh
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- iglob1 = ibool(i,j,k,ispec)
- accel_crust_mantle(:,iglob1) = accel_crust_mantle(:,iglob1) + sum_terms(:,i,j,k)
+ iglob = ibool(i,j,k,ispec)
+! do NOT use array syntax ":" for the three statements below otherwise most compilers will not be able to vectorize the outer loop
+ accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(1,i,j,k)
+ accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(2,i,j,k)
+ accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(3,i,j,k)
enddo
enddo
enddo
@@ -564,10 +568,9 @@
epsilondev(5,:,:,:,ispec) = epsilondev_loc(5,:,:,:)
endif
-! end ispec loop
- enddo
+ enddo ! end of ispec loop
- enddo ! spectral element loop NSPEC_CRUST_MANTLE
+ enddo ! of spectral element loop NSPEC_CRUST_MANTLE
end subroutine compute_forces_crust_mantle_Dev
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90 2013-07-16 17:18:13 UTC (rev 22628)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90 2013-07-16 21:55:23 UTC (rev 22629)
@@ -178,7 +178,7 @@
integer :: int_radius
integer :: ispec
integer :: i,j,k
- integer :: iglob1
+ integer :: iglob
! this for non blocking MPI
integer :: iphase,icall
@@ -301,29 +301,29 @@
endif
- ! subroutines adapted from Deville, Fischer and Mund, High-order methods
- ! for incompressible fluid flow, Cambridge University Press (2002),
- ! pages 386 and 389 and Figure 8.3.1
#ifdef FORCE_VECTORIZATION
do ijk=1,NGLLCUBE
- iglob1 = ibool(ijk,1,1,ispec)
- dummyx_loc(ijk,1,1) = displ_inner_core(1,iglob1)
- dummyy_loc(ijk,1,1) = displ_inner_core(2,iglob1)
- dummyz_loc(ijk,1,1) = displ_inner_core(3,iglob1)
+ iglob = ibool(ijk,1,1,ispec)
+ dummyx_loc(ijk,1,1) = displ_inner_core(1,iglob)
+ dummyy_loc(ijk,1,1) = displ_inner_core(2,iglob)
+ dummyz_loc(ijk,1,1) = displ_inner_core(3,iglob)
enddo
#else
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- iglob1 = ibool(i,j,k,ispec)
- dummyx_loc(i,j,k) = displ_inner_core(1,iglob1)
- dummyy_loc(i,j,k) = displ_inner_core(2,iglob1)
- dummyz_loc(i,j,k) = displ_inner_core(3,iglob1)
+ iglob = ibool(i,j,k,ispec)
+ dummyx_loc(i,j,k) = displ_inner_core(1,iglob)
+ dummyy_loc(i,j,k) = displ_inner_core(2,iglob)
+ dummyz_loc(i,j,k) = displ_inner_core(3,iglob)
enddo
enddo
enddo
#endif
+ ! subroutines adapted from Deville, Fischer and Mund, High-order methods
+ ! for incompressible fluid flow, Cambridge University Press (2002),
+ ! pages 386 and 389 and Figure 8.3.1
do j=1,m2
do i=1,m1
C1_m1_m2_5points(i,j) = hprime_xx(i,1)*B1_m1_m2_5points(1,j) + &
@@ -541,10 +541,10 @@
! use mesh coordinates to get theta and phi
! x y and z contain r theta and phi
- iglob1 = ibool(i,j,k,ispec)
- radius = dble(xstore(iglob1))
- theta = dble(ystore(iglob1))
- phi = dble(zstore(iglob1))
+ iglob = ibool(i,j,k,ispec)
+ radius = dble(xstore(iglob))
+ theta = dble(ystore(iglob))
+ phi = dble(zstore(iglob))
! make sure radius is never zero even for points at center of cube
! because we later divide by radius
@@ -587,64 +587,33 @@
Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
! for locality principle, we set iglob again, in order to have it in the cache again
- iglob1 = ibool(i,j,k,ispec)
+ iglob = ibool(i,j,k,ispec)
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- ! get displacement and multiply by density to compute G tensor
- sx_l = rho * dble(displ_inner_core(1,iglob1))
- sy_l = rho * dble(displ_inner_core(2,iglob1))
- sz_l = rho * dble(displ_inner_core(3,iglob1))
+ ! get displacement and multiply by density to compute G tensor
+ sx_l = rho * displ_inner_core(1,iglob)
+ sy_l = rho * displ_inner_core(2,iglob)
+ sz_l = rho * displ_inner_core(3,iglob)
- ! compute G tensor from s . g and add to sigma (not symmetric)
- sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
- sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
- sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
+ ! compute G tensor from s . g and add to sigma (not symmetric)
+ sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
+ sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
+ sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
- sigma_xy = sigma_xy - sngl(sx_l * gyl)
- sigma_yx = sigma_yx - sngl(sy_l * gxl)
+ sigma_xy = sigma_xy - sx_l * gyl
+ sigma_yx = sigma_yx - sy_l * gxl
- sigma_xz = sigma_xz - sngl(sx_l * gzl)
- sigma_zx = sigma_zx - sngl(sz_l * gxl)
+ sigma_xz = sigma_xz - sx_l * gzl
+ sigma_zx = sigma_zx - sz_l * gxl
- sigma_yz = sigma_yz - sngl(sy_l * gzl)
- sigma_zy = sigma_zy - sngl(sz_l * gyl)
+ sigma_yz = sigma_yz - sy_l * gzl
+ sigma_zy = sigma_zy - sz_l * gyl
- ! precompute vector
- factor = dble(jacobianl) * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
- rho_s_H(2,i,j,k) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
- rho_s_H(3,i,j,k) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+ ! precompute vector
+ factor = jacobianl * wgll_cube(i,j,k)
+ rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+ rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+ rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
- else
-
- ! get displacement and multiply by density to compute G tensor
- sx_l = rho * displ_inner_core(1,iglob1)
- sy_l = rho * displ_inner_core(2,iglob1)
- sz_l = rho * displ_inner_core(3,iglob1)
-
- ! compute G tensor from s . g and add to sigma (not symmetric)
- sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
- sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
- sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
-
- sigma_xy = sigma_xy - sx_l * gyl
- sigma_yx = sigma_yx - sy_l * gxl
-
- sigma_xz = sigma_xz - sx_l * gzl
- sigma_zx = sigma_zx - sz_l * gxl
-
- sigma_yz = sigma_yz - sy_l * gzl
- sigma_zy = sigma_zy - sz_l * gyl
-
- ! precompute vector
- factor = jacobianl * wgll_cube(i,j,k)
- rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
- rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
- rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
-
- endif
-
endif ! end of section with gravity terms
! form dot product with test vector, non-symmetric form
@@ -736,30 +705,34 @@
enddo
enddo
+ ! sum contributions
do k=1,NGLLZ
do j=1,NGLLY
fac1 = wgllwgll_yz(j,k)
do i=1,NGLLX
fac2 = wgllwgll_xz(i,k)
fac3 = wgllwgll_xy(i,j)
-
- ! sum contributions
sum_terms(1,i,j,k) = - (fac1*newtempx1(i,j,k) + fac2*newtempx2(i,j,k) + fac3*newtempx3(i,j,k))
sum_terms(2,i,j,k) = - (fac1*newtempy1(i,j,k) + fac2*newtempy2(i,j,k) + fac3*newtempy3(i,j,k))
sum_terms(3,i,j,k) = - (fac1*newtempz1(i,j,k) + fac2*newtempz2(i,j,k) + fac3*newtempz3(i,j,k))
-
- if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
-
enddo
enddo
enddo
- ! sum contributions from each element to the global mesh and add gravity terms
+ ! add gravity terms
+ if(GRAVITY_VAL) then
+ sum_terms(:,:,:,:) = sum_terms(:,:,:,:) + rho_s_H(:,:,:,:)
+ endif
+
+ ! sum contributions from each element to the global mesh
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
- iglob1 = ibool(i,j,k,ispec)
- accel_inner_core(:,iglob1) = accel_inner_core(:,iglob1) + sum_terms(:,i,j,k)
+ iglob = ibool(i,j,k,ispec)
+! do NOT use array syntax ":" for the three statements below otherwise most compilers will not be able to vectorize the outer loop
+ accel_inner_core(1,iglob) = accel_inner_core(1,iglob) + sum_terms(1,i,j,k)
+ accel_inner_core(2,iglob) = accel_inner_core(2,iglob) + sum_terms(2,i,j,k)
+ accel_inner_core(3,iglob) = accel_inner_core(3,iglob) + sum_terms(3,i,j,k)
enddo
enddo
enddo
@@ -802,9 +775,9 @@
endif
- endif ! end test to exclude fictitious elements in central cube
+ endif ! end of test to exclude fictitious elements in central cube
- enddo ! spectral element loop
+ enddo ! of spectral element loop
end subroutine compute_forces_inner_core_Dev
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.f90 2013-07-16 17:18:13 UTC (rev 22628)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.f90 2013-07-16 21:55:23 UTC (rev 22629)
@@ -106,7 +106,7 @@
integer :: i,j,k
real(kind=CUSTOM_REAL) :: xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
real(kind=CUSTOM_REAL) :: dpotentialdxl,dpotentialdyl,dpotentialdzl
- real(kind=CUSTOM_REAL) :: sum_terms
+ real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: sum_terms
! manually inline the calls to the Deville et al. (2002) routines
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: dummyx_loc
@@ -213,7 +213,7 @@
do i=1,NGLLX
iglob = ibool(i,j,k,ispec)
- ! stores "displacement"
+ ! get a local copy of the potential field
dummyx_loc(i,j,k) = displfluid(iglob)
! pre-computes factors
@@ -233,11 +233,11 @@
if( .not. GRAVITY_VAL ) then
! grad(rho)/rho in Cartesian components
displ_times_grad_x_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
- * sngl(sin_theta * cos_phi * d_ln_density_dr_table(int_radius))
+ * sin_theta * cos_phi * d_ln_density_dr_table(int_radius)
displ_times_grad_y_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
- * sngl(sin_theta * sin_phi * d_ln_density_dr_table(int_radius))
+ * sin_theta * sin_phi * d_ln_density_dr_table(int_radius)
displ_times_grad_z_ln_rho(i,j,k) = dummyx_loc(i,j,k) &
- * sngl(cos_theta * d_ln_density_dr_table(int_radius))
+ * cos_theta * d_ln_density_dr_table(int_radius)
else
! Cartesian components of the gravitational acceleration
! integrate and multiply by rho / Kappa
@@ -388,35 +388,25 @@
gyl = temp_gyl(i,j,k)
gzl = temp_gzl(i,j,k)
- ! distinguish between single and double precision for reals
- if(CUSTOM_REAL == SIZE_REAL) then
- gravity_term(i,j,k) = &
- sngl( minus_rho_g_over_kappa_fluid(int_radius) &
- * dble(jacobianl) * wgll_cube(i,j,k) &
- * (dble(dpotentialdx_with_rot) * gxl &
- + dble(dpotentialdy_with_rot) * gyl &
- + dble(dpotentialdzl) * gzl) )
- else
- gravity_term(i,j,k) = minus_rho_g_over_kappa_fluid(int_radius) * &
+ gravity_term(i,j,k) = minus_rho_g_over_kappa_fluid(int_radius) * &
jacobianl * wgll_cube(i,j,k) &
* (dpotentialdx_with_rot * gxl &
+ dpotentialdy_with_rot * gyl &
+ dpotentialdzl * gzl)
- endif
if(istage == 1)then
! divergence of displacement field with gravity on
! note: these calculations are only considered for SIMULATION_TYPE == 1 .and. SAVE_FORWARD
! and one has set MOVIE_VOLUME_TYPE == 4 when MOVIE_VOLUME is .true.;
! in case of SIMULATION_TYPE == 3, it gets overwritten by compute_kernels_outer_core()
- if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. SIMULATION_TYPE == 1 .and. MOVIE_VOLUME) then
- div_displfluid(i,j,k,ispec) = &
+ if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. SIMULATION_TYPE == 1 .and. MOVIE_VOLUME) then
+ div_displfluid(i,j,k,ispec) = &
minus_rho_g_over_kappa_fluid(int_radius) &
* (dpotentialdx_with_rot * gxl &
+ dpotentialdy_with_rot * gyl &
+ dpotentialdzl * gzl)
+ endif
endif
- endif
endif
@@ -466,27 +456,34 @@
enddo
enddo
+ ! sum contributions from each element to the global mesh
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
+ sum_terms(i,j,k) = - (wgllwgll_yz(j,k)*newtempx1(i,j,k) &
+ + wgllwgll_xz(i,k)*newtempx2(i,j,k) &
+ + wgllwgll_xy(i,j)*newtempx3(i,j,k))
+ enddo
+ enddo
+ enddo
- ! sum contributions from each element to the global mesh and add gravity term
- sum_terms = - (wgllwgll_yz(j,k)*newtempx1(i,j,k) &
- + wgllwgll_xz(i,k)*newtempx2(i,j,k) &
- + wgllwgll_xy(i,j)*newtempx3(i,j,k))
+ ! add gravity term
+ if(GRAVITY_VAL) then
+ sum_terms(:,:,:) = sum_terms(:,:,:) + gravity_term(:,:,:)
+ endif
- if(GRAVITY_VAL) sum_terms = sum_terms + gravity_term(i,j,k)
-
+ do k=1,NGLLZ
+ do j=1,NGLLY
+ do i=1,NGLLX
iglob = ibool(i,j,k,ispec)
- accelfluid(iglob) = accelfluid(iglob) + sum_terms
-
+ accelfluid(iglob) = accelfluid(iglob) + sum_terms(i,j,k)
enddo
enddo
enddo
! update rotation term with Euler scheme
if(ROTATION_VAL) then
- if(USE_LDDRK)then
+ if(USE_LDDRK) then
! use the source saved above
A_array_rotation_lddrk(:,:,:,ispec) = ALPHA_LDDRK(istage) * A_array_rotation_lddrk(:,:,:,ispec) + source_euler_A(:,:,:)
A_array_rotation(:,:,:,ispec) = A_array_rotation(:,:,:,ispec) + BETA_LDDRK(istage) * A_array_rotation_lddrk(:,:,:,ispec)
@@ -500,7 +497,7 @@
endif
endif
- enddo ! spectral element loop
+ enddo ! of spectral element loop
end subroutine compute_forces_outer_core_Dev
More information about the CIG-COMMITS
mailing list