[cig-commits] r22632 - in seismo/3D/SPECFEM3D_GLOBE/trunk: . src/specfem3D

dkomati1 at geodynamics.org dkomati1 at geodynamics.org
Tue Jul 16 16:02:48 PDT 2013


Author: dkomati1
Date: 2013-07-16 16:02:48 -0700 (Tue, 16 Jul 2013)
New Revision: 22632

Modified:
   seismo/3D/SPECFEM3D_GLOBE/trunk/flags.guess
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/rules.mk
Log:
vectorized many loops in compute_forces(); but not done with all the loops yet


Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/flags.guess
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/flags.guess	2013-07-16 22:17:12 UTC (rev 22631)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/flags.guess	2013-07-16 23:02:48 UTC (rev 22632)
@@ -30,7 +30,7 @@
 # parallel file systems like SFS 3.2 / Lustre 1.8. If omitted
 # I/O throughput lingers at 2.5 MB/s, with it it can increase to ~44 MB/s
 # However it does not make much of a difference on NFS mounted volumes or with SFS 3.1.1 / Lustre 1.6.7.1 
-        DEF_FFLAGS="-O3 -DFORCE_VECTORIZATION -check nobounds -xHost -ftz -assume buffered_io -assume byterecl -align sequence -vec-report0 -std03 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage" # -mcmodel=medium -shared-intel
+        DEF_FFLAGS="-O3 -DFORCE_VECTORIZATION -check nobounds -xHost -ftz -assume buffered_io -assume byterecl -align sequence -vec-report0 -std03 -diag-disable 6477 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -warn alignments -warn ignore_loc -warn usage" # -mcmodel=medium -shared-intel
         # useful for debugging...
         # for debugging: change -O3 -DFORCE_VECTORIZATION -check nobounds to      -check all -debug -g -O0 -fp-stack-check -traceback -ftrapuv
         #

Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90	2013-07-16 22:17:12 UTC (rev 22631)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle_Dev.F90	2013-07-16 23:02:48 UTC (rev 22632)
@@ -512,10 +512,30 @@
 
     ! add gravity terms
     if(GRAVITY_VAL) then
+#ifdef FORCE_VECTORIZATION
+      do ijk = 1,NDIM*NGLLCUBE
+        sum_terms(ijk,1,1,1) = sum_terms(ijk,1,1,1) + rho_s_H(ijk,1,1,1)
+      enddo
+#else
       sum_terms(:,:,:,:) = sum_terms(:,:,:,:) + rho_s_H(:,:,:,:)
+#endif
     endif
 
     ! sum contributions from each element to the global mesh
+#ifdef FORCE_VECTORIZATION
+! we can force vectorization using a compiler directive here because we know that there is no dependency
+! inside a given spectral element, since all the global points of a local elements are different by definition
+! (only common points between different elements can be the same)
+!IBM* ASSERT (NODEPS)
+!DIR$ IVDEP
+        do ijk = 1,NGLLCUBE
+          iglob = ibool(ijk,1,1,ispec)
+! do NOT use array syntax ":" for the three statements below otherwise most compilers will not be able to vectorize the outer loop
+          accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(1,ijk,1,1)
+          accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(2,ijk,1,1)
+          accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(3,ijk,1,1)
+        enddo
+#else
     do k=1,NGLLZ
       do j=1,NGLLY
         do i=1,NGLLX
@@ -527,6 +547,7 @@
         enddo
       enddo
     enddo
+#endif
 
     ! update memory variables based upon the Runge-Kutta scheme
     ! convention for attenuation
@@ -552,7 +573,13 @@
                                          epsilondev_loc,epsilondev(1,1,1,1,ispec),&
                                          istage,R_memory_lddrk,tau_sigma_CUSTOM_REAL,deltat,USE_LDDRK)
 
+#ifdef FORCE_VECTORIZATION
+      do ijk = 1,5*NGLLCUBE
+        epsilondev(ijk,1,1,1,ispec) = epsilondev_loc(ijk,1,1,1)
+      enddo
+#else
       epsilondev(:,:,:,:,ispec) = epsilondev_loc(:,:,:,:)
+#endif
     endif
 
    enddo ! end of ispec loop

Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90	2013-07-16 22:17:12 UTC (rev 22631)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core_Dev.F90	2013-07-16 23:02:48 UTC (rev 22632)
@@ -720,10 +720,30 @@
 
       ! add gravity terms
       if(GRAVITY_VAL) then
+#ifdef FORCE_VECTORIZATION
+        do ijk = 1,NDIM*NGLLCUBE
+          sum_terms(ijk,1,1,1) = sum_terms(ijk,1,1,1) + rho_s_H(ijk,1,1,1)
+        enddo
+#else
         sum_terms(:,:,:,:) = sum_terms(:,:,:,:) + rho_s_H(:,:,:,:)
+#endif
       endif
 
       ! sum contributions from each element to the global mesh
+#ifdef FORCE_VECTORIZATION
+! we can force vectorization using a compiler directive here because we know that there is no dependency
+! inside a given spectral element, since all the global points of a local elements are different by definition
+! (only common points between different elements can be the same)
+!IBM* ASSERT (NODEPS)
+!DIR$ IVDEP
+          do ijk = 1,NGLLCUBE
+            iglob = ibool(ijk,1,1,ispec)
+! do NOT use array syntax ":" for the three statements below otherwise most compilers will not be able to vectorize the outer loop
+            accel_inner_core(1,iglob) = accel_inner_core(1,iglob) + sum_terms(1,ijk,1,1)
+            accel_inner_core(2,iglob) = accel_inner_core(2,iglob) + sum_terms(2,ijk,1,1)
+            accel_inner_core(3,iglob) = accel_inner_core(3,iglob) + sum_terms(3,ijk,1,1)
+          enddo
+#else
       do k=1,NGLLZ
         do j=1,NGLLY
           do i=1,NGLLX
@@ -735,6 +755,7 @@
           enddo
         enddo
       enddo
+#endif
 
       ! use Runge-Kutta scheme to march memory variables in time
       ! convention for attenuation
@@ -759,7 +780,13 @@
                                       epsilondev_loc,epsilondev(1,1,1,1,ispec),&
                                       istage,R_memory_lddrk,tau_sigma_CUSTOM_REAL,deltat,USE_LDDRK)
 
+#ifdef FORCE_VECTORIZATION
+        do ijk = 1,5*NGLLCUBE
+          epsilondev(ijk,1,1,1,ispec) = epsilondev_loc(ijk,1,1,1)
+        enddo
+#else
         epsilondev(:,:,:,:,ispec) = epsilondev_loc(:,:,:,:)
+#endif
       endif
 
     endif ! end of test to exclude fictitious elements in central cube

Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90	2013-07-16 22:17:12 UTC (rev 22631)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core_Dev.F90	2013-07-16 23:02:48 UTC (rev 22632)
@@ -175,6 +175,10 @@
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
             A_array_rotation_lddrk,B_array_rotation_lddrk
 
+#ifdef FORCE_VECTORIZATION
+  integer :: ijk
+#endif
+
 ! ****************************************************
 !   big loop over all spectral elements in the fluid
 ! ****************************************************
@@ -208,6 +212,44 @@
             NGLOB2DMAX_XMIN_XMAX_OC,NGLOB2DMAX_YMIN_YMAX_OC, &
             NGLOB2DMAX_XY_OC_VAL,NCHUNKS_VAL,iphase)
 
+#ifdef FORCE_VECTORIZATION
+        do ijk=1,NGLLCUBE
+          iglob = ibool(ijk,1,1,ispec)
+
+          ! get a local copy of the potential field
+          dummyx_loc(ijk,1,1) = displfluid(iglob)
+
+          ! pre-computes factors
+          ! use mesh coordinates to get theta and phi
+          ! x y z contain r theta phi
+          radius = dble(xstore(iglob))
+          theta = dble(ystore(iglob))
+          phi = dble(zstore(iglob))
+
+          cos_theta = dcos(theta)
+          sin_theta = dsin(theta)
+          cos_phi = dcos(phi)
+          sin_phi = dsin(phi)
+
+          int_radius = nint(radius * R_EARTH_KM * 10.d0)
+
+          if( .not. GRAVITY_VAL ) then
+            ! grad(rho)/rho in Cartesian components
+            displ_times_grad_x_ln_rho(ijk,1,1) = dummyx_loc(ijk,1,1) &
+                  * sin_theta * cos_phi * d_ln_density_dr_table(int_radius)
+            displ_times_grad_y_ln_rho(ijk,1,1) = dummyx_loc(ijk,1,1) &
+                  * sin_theta * sin_phi * d_ln_density_dr_table(int_radius)
+            displ_times_grad_z_ln_rho(ijk,1,1) = dummyx_loc(ijk,1,1) &
+                  * cos_theta * d_ln_density_dr_table(int_radius)
+          else
+            ! Cartesian components of the gravitational acceleration
+            ! integrate and multiply by rho / Kappa
+            temp_gxl(ijk,1,1) = sin_theta*cos_phi
+            temp_gyl(ijk,1,1) = sin_theta*sin_phi
+            temp_gzl(ijk,1,1) = cos_theta
+          endif
+        enddo
+#else
     do k=1,NGLLZ
       do j=1,NGLLY
         do i=1,NGLLX
@@ -245,10 +287,10 @@
             temp_gyl(i,j,k) = sin_theta*sin_phi
             temp_gzl(i,j,k) = cos_theta
           endif
-
         enddo
       enddo
     enddo
+#endif
 
     ! subroutines adapted from Deville, Fischer and Mund, High-order methods
     ! for incompressible fluid flow, Cambridge University Press (2002),
@@ -469,9 +511,26 @@
 
     ! add gravity term
     if(GRAVITY_VAL) then
+#ifdef FORCE_VECTORIZATION
+      do ijk = 1,NGLLCUBE
+        sum_terms(ijk,1,1) = sum_terms(ijk,1,1) + gravity_term(ijk,1,1)
+      enddo
+#else
       sum_terms(:,:,:) = sum_terms(:,:,:) + gravity_term(:,:,:)
+#endif
     endif
 
+#ifdef FORCE_VECTORIZATION
+! we can force vectorization using a compiler directive here because we know that there is no dependency
+! inside a given spectral element, since all the global points of a local elements are different by definition
+! (only common points between different elements can be the same)
+!IBM* ASSERT (NODEPS)
+!DIR$ IVDEP
+        do ijk = 1,NGLLCUBE
+          iglob = ibool(ijk,1,1,ispec)
+          accelfluid(iglob) = accelfluid(iglob) + sum_terms(ijk,1,1)
+        enddo
+#else
     do k=1,NGLLZ
       do j=1,NGLLY
         do i=1,NGLLX
@@ -480,6 +539,7 @@
         enddo
       enddo
     enddo
+#endif
 
     ! update rotation term with Euler scheme
     if(ROTATION_VAL) then
@@ -492,8 +552,15 @@
         B_array_rotation(:,:,:,ispec) = B_array_rotation(:,:,:,ispec) + BETA_LDDRK(istage) * B_array_rotation_lddrk(:,:,:,ispec)
       else
         ! use the source saved above
+#ifdef FORCE_VECTORIZATION
+        do ijk = 1,NGLLCUBE
+          A_array_rotation(ijk,1,1,ispec) = A_array_rotation(ijk,1,1,ispec) + source_euler_A(ijk,1,1)
+          B_array_rotation(ijk,1,1,ispec) = B_array_rotation(ijk,1,1,ispec) + source_euler_B(ijk,1,1)
+        enddo
+#else
         A_array_rotation(:,:,:,ispec) = A_array_rotation(:,:,:,ispec) + source_euler_A(:,:,:)
         B_array_rotation(:,:,:,ispec) = B_array_rotation(:,:,:,ispec) + source_euler_B(:,:,:)
+#endif
       endif
     endif
 

Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/rules.mk
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/rules.mk	2013-07-16 22:17:12 UTC (rev 22631)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/rules.mk	2013-07-16 23:02:48 UTC (rev 22632)
@@ -174,8 +174,8 @@
 $O/compute_forces_outer_core_noDev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_outer_core_noDev.f90
 	${FCCOMPILE_CHECK} -c -o $O/compute_forces_outer_core_noDev.o ${FCFLAGS_f90} $S/compute_forces_outer_core_noDev.f90
 
-$O/compute_forces_outer_core_Dev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_outer_core_Dev.f90
-	${FCCOMPILE_CHECK} -c -o $O/compute_forces_outer_core_Dev.o ${FCFLAGS_f90} $S/compute_forces_outer_core_Dev.f90
+$O/compute_forces_outer_core_Dev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_outer_core_Dev.F90
+	${FCCOMPILE_CHECK} -c -o $O/compute_forces_outer_core_Dev.o ${FCFLAGS_f90} $S/compute_forces_outer_core_Dev.F90
 
 $O/compute_forces_inner_core_noDev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_inner_core_noDev.f90
 	${FCCOMPILE_CHECK} -c -o $O/compute_forces_inner_core_noDev.o ${FCFLAGS_f90} $S/compute_forces_inner_core_noDev.f90



More information about the CIG-COMMITS mailing list