[cig-commits] r22632 - in seismo/3D/SPECFEM3D_GLOBE/trunk: . src/specfem3D
dkomati1 at geodynamics.org
dkomati1 at geodynamics.org
Tue Jul 16 16:02:48 PDT 2013
Author: dkomati1
Date: 2013-07-16 16:02:48 -0700 (Tue, 16 Jul 2013)
New Revision: 22632
Modified:
seismo/3D/SPECFEM3D_GLOBE/trunk/flags.guess
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90
seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/rules.mk
Log:
vectorized many loops in compute_forces(); but not done with all the loops yet
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/flags.guess
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/flags.guess 2013-07-16 22:17:12 UTC (rev 22631)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/flags.guess 2013-07-16 23:02:48 UTC (rev 22632)
@@ -30,7 +30,7 @@
# parallel file systems like SFS 3.2 / Lustre 1.8. If omitted
# I/O throughput lingers at 2.5 MB/s, with it it can increase to ~44 MB/s
# However it does not make much of a difference on NFS mounted volumes or with SFS 3.1.1 / Lustre 1.6.7.1
- DEF_FFLAGS="-O3 -DFORCE_VECTORIZATION -check nobounds -xHost -ftz -assume buffered_io -assume byterecl -align sequence -vec-report0 -std03 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage" # -mcmodel=medium -shared-intel
+ DEF_FFLAGS="-O3 -DFORCE_VECTORIZATION -check nobounds -xHost -ftz -assume buffered_io -assume byterecl -align sequence -vec-report0 -std03 -diag-disable 6477 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage" # -mcmodel=medium -shared-intel
# useful for debugging...
# for debugging: change -O3 -DFORCE_VECTORIZATION -check nobounds to -check all -debug -g -O0 -fp-stack-check -traceback -ftrapuv
#
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2013-07-16 22:17:12 UTC (rev 22631)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90 2013-07-16 23:02:48 UTC (rev 22632)
@@ -512,10 +512,30 @@
! add gravity terms
if(GRAVITY_VAL) then
+#ifdef FORCE_VECTORIZATION
+ do ijk = 1,NDIM*NGLLCUBE
+ sum_terms(ijk,1,1,1) = sum_terms(ijk,1,1,1) + rho_s_H(ijk,1,1,1)
+ enddo
+#else
sum_terms(:,:,:,:) = sum_terms(:,:,:,:) + rho_s_H(:,:,:,:)
+#endif
endif
! sum contributions from each element to the global mesh
+#ifdef FORCE_VECTORIZATION
+! we can force vectorization using a compiler directive here because we know that there is no dependency
+! inside a given spectral element, since all the global points of a local elements are different by definition
+! (only common points between different elements can be the same)
+!IBM* ASSERT (NODEPS)
+!DIR$ IVDEP
+ do ijk = 1,NGLLCUBE
+ iglob = ibool(ijk,1,1,ispec)
+! do NOT use array syntax ":" for the three statements below otherwise most compilers will not be able to vectorize the outer loop
+ accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(1,ijk,1,1)
+ accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(2,ijk,1,1)
+ accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(3,ijk,1,1)
+ enddo
+#else
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -527,6 +547,7 @@
enddo
enddo
enddo
+#endif
! update memory variables based upon the Runge-Kutta scheme
! convention for attenuation
@@ -552,7 +573,13 @@
epsilondev_loc,epsilondev(1,1,1,1,ispec),&
istage,R_memory_lddrk,tau_sigma_CUSTOM_REAL,deltat,USE_LDDRK)
+#ifdef FORCE_VECTORIZATION
+ do ijk = 1,5*NGLLCUBE
+ epsilondev(ijk,1,1,1,ispec) = epsilondev_loc(ijk,1,1,1)
+ enddo
+#else
epsilondev(:,:,:,:,ispec) = epsilondev_loc(:,:,:,:)
+#endif
endif
enddo ! end of ispec loop
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90 2013-07-16 22:17:12 UTC (rev 22631)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90 2013-07-16 23:02:48 UTC (rev 22632)
@@ -720,10 +720,30 @@
! add gravity terms
if(GRAVITY_VAL) then
+#ifdef FORCE_VECTORIZATION
+ do ijk = 1,NDIM*NGLLCUBE
+ sum_terms(ijk,1,1,1) = sum_terms(ijk,1,1,1) + rho_s_H(ijk,1,1,1)
+ enddo
+#else
sum_terms(:,:,:,:) = sum_terms(:,:,:,:) + rho_s_H(:,:,:,:)
+#endif
endif
! sum contributions from each element to the global mesh
+#ifdef FORCE_VECTORIZATION
+! we can force vectorization using a compiler directive here because we know that there is no dependency
+! inside a given spectral element, since all the global points of a local elements are different by definition
+! (only common points between different elements can be the same)
+!IBM* ASSERT (NODEPS)
+!DIR$ IVDEP
+ do ijk = 1,NGLLCUBE
+ iglob = ibool(ijk,1,1,ispec)
+! do NOT use array syntax ":" for the three statements below otherwise most compilers will not be able to vectorize the outer loop
+ accel_inner_core(1,iglob) = accel_inner_core(1,iglob) + sum_terms(1,ijk,1,1)
+ accel_inner_core(2,iglob) = accel_inner_core(2,iglob) + sum_terms(2,ijk,1,1)
+ accel_inner_core(3,iglob) = accel_inner_core(3,iglob) + sum_terms(3,ijk,1,1)
+ enddo
+#else
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -735,6 +755,7 @@
enddo
enddo
enddo
+#endif
! use Runge-Kutta scheme to march memory variables in time
! convention for attenuation
@@ -759,7 +780,13 @@
epsilondev_loc,epsilondev(1,1,1,1,ispec),&
istage,R_memory_lddrk,tau_sigma_CUSTOM_REAL,deltat,USE_LDDRK)
+#ifdef FORCE_VECTORIZATION
+ do ijk = 1,5*NGLLCUBE
+ epsilondev(ijk,1,1,1,ispec) = epsilondev_loc(ijk,1,1,1)
+ enddo
+#else
epsilondev(:,:,:,:,ispec) = epsilondev_loc(:,:,:,:)
+#endif
endif
endif ! end of test to exclude fictitious elements in central cube
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90 2013-07-16 22:17:12 UTC (rev 22631)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90 2013-07-16 23:02:48 UTC (rev 22632)
@@ -175,6 +175,10 @@
real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
A_array_rotation_lddrk,B_array_rotation_lddrk
+#ifdef FORCE_VECTORIZATION
+ integer :: ijk
+#endif
+
! ****************************************************
! big loop over all spectral elements in the fluid
! ****************************************************
@@ -208,6 +212,44 @@
NGLOB2DMAX_XMIN_XMAX_OC,NGLOB2DMAX_YMIN_YMAX_OC, &
NGLOB2DMAX_XY_OC_VAL,NCHUNKS_VAL,iphase)
+#ifdef FORCE_VECTORIZATION
+ do ijk=1,NGLLCUBE
+ iglob = ibool(ijk,1,1,ispec)
+
+ ! get a local copy of the potential field
+ dummyx_loc(ijk,1,1) = displfluid(iglob)
+
+ ! pre-computes factors
+ ! use mesh coordinates to get theta and phi
+ ! x y z contain r theta phi
+ radius = dble(xstore(iglob))
+ theta = dble(ystore(iglob))
+ phi = dble(zstore(iglob))
+
+ cos_theta = dcos(theta)
+ sin_theta = dsin(theta)
+ cos_phi = dcos(phi)
+ sin_phi = dsin(phi)
+
+ int_radius = nint(radius * R_EARTH_KM * 10.d0)
+
+ if( .not. GRAVITY_VAL ) then
+ ! grad(rho)/rho in Cartesian components
+ displ_times_grad_x_ln_rho(ijk,1,1) = dummyx_loc(ijk,1,1) &
+ * sin_theta * cos_phi * d_ln_density_dr_table(int_radius)
+ displ_times_grad_y_ln_rho(ijk,1,1) = dummyx_loc(ijk,1,1) &
+ * sin_theta * sin_phi * d_ln_density_dr_table(int_radius)
+ displ_times_grad_z_ln_rho(ijk,1,1) = dummyx_loc(ijk,1,1) &
+ * cos_theta * d_ln_density_dr_table(int_radius)
+ else
+ ! Cartesian components of the gravitational acceleration
+ ! integrate and multiply by rho / Kappa
+ temp_gxl(ijk,1,1) = sin_theta*cos_phi
+ temp_gyl(ijk,1,1) = sin_theta*sin_phi
+ temp_gzl(ijk,1,1) = cos_theta
+ endif
+ enddo
+#else
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -245,10 +287,10 @@
temp_gyl(i,j,k) = sin_theta*sin_phi
temp_gzl(i,j,k) = cos_theta
endif
-
enddo
enddo
enddo
+#endif
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
@@ -469,9 +511,26 @@
! add gravity term
if(GRAVITY_VAL) then
+#ifdef FORCE_VECTORIZATION
+ do ijk = 1,NGLLCUBE
+ sum_terms(ijk,1,1) = sum_terms(ijk,1,1) + gravity_term(ijk,1,1)
+ enddo
+#else
sum_terms(:,:,:) = sum_terms(:,:,:) + gravity_term(:,:,:)
+#endif
endif
+#ifdef FORCE_VECTORIZATION
+! we can force vectorization using a compiler directive here because we know that there is no dependency
+! inside a given spectral element, since all the global points of a local elements are different by definition
+! (only common points between different elements can be the same)
+!IBM* ASSERT (NODEPS)
+!DIR$ IVDEP
+ do ijk = 1,NGLLCUBE
+ iglob = ibool(ijk,1,1,ispec)
+ accelfluid(iglob) = accelfluid(iglob) + sum_terms(ijk,1,1)
+ enddo
+#else
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -480,6 +539,7 @@
enddo
enddo
enddo
+#endif
! update rotation term with Euler scheme
if(ROTATION_VAL) then
@@ -492,8 +552,15 @@
B_array_rotation(:,:,:,ispec) = B_array_rotation(:,:,:,ispec) + BETA_LDDRK(istage) * B_array_rotation_lddrk(:,:,:,ispec)
else
! use the source saved above
+#ifdef FORCE_VECTORIZATION
+ do ijk = 1,NGLLCUBE
+ A_array_rotation(ijk,1,1,ispec) = A_array_rotation(ijk,1,1,ispec) + source_euler_A(ijk,1,1)
+ B_array_rotation(ijk,1,1,ispec) = B_array_rotation(ijk,1,1,ispec) + source_euler_B(ijk,1,1)
+ enddo
+#else
A_array_rotation(:,:,:,ispec) = A_array_rotation(:,:,:,ispec) + source_euler_A(:,:,:)
B_array_rotation(:,:,:,ispec) = B_array_rotation(:,:,:,ispec) + source_euler_B(:,:,:)
+#endif
endif
endif
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/rules.mk
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/rules.mk 2013-07-16 22:17:12 UTC (rev 22631)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/rules.mk 2013-07-16 23:02:48 UTC (rev 22632)
@@ -174,8 +174,8 @@
$O/compute_forces_outer_core_noDev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_outer_core_noDev.f90
${FCCOMPILE_CHECK} -c -o $O/compute_forces_outer_core_noDev.o ${FCFLAGS_f90} $S/compute_forces_outer_core_noDev.f90
-$O/compute_forces_outer_core_Dev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_outer_core_Dev.f90
- ${FCCOMPILE_CHECK} -c -o $O/compute_forces_outer_core_Dev.o ${FCFLAGS_f90} $S/compute_forces_outer_core_Dev.f90
+$O/compute_forces_outer_core_Dev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_outer_core_Dev.F90
+ ${FCCOMPILE_CHECK} -c -o $O/compute_forces_outer_core_Dev.o ${FCFLAGS_f90} $S/compute_forces_outer_core_Dev.F90
$O/compute_forces_inner_core_noDev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_inner_core_noDev.f90
${FCCOMPILE_CHECK} -c -o $O/compute_forces_inner_core_noDev.o ${FCFLAGS_f90} $S/compute_forces_inner_core_noDev.f90
More information about the CIG-COMMITS
mailing list