[cig-commits] [commit] devel, master: added Vittorio Ruggiero's additional OpenMP support, and in particular added a new compiler option -DUSE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL to use OpenMP ATOMIC statements for some critical loops instead of OpenMP CRITICAL regions. (70b3411)
cig_noreply at geodynamics.org
cig_noreply at geodynamics.org
Thu Nov 6 08:18:36 PST 2014
Repository : https://github.com/geodynamics/specfem3d_globe
On branches: devel,master
Link : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f
>---------------------------------------------------------------
commit 70b3411f037d030b5e98d3b4a38fe63541f764f5
Author: Dimitri Komatitsch <komatitsch at lma.cnrs-mrs.fr>
Date: Fri May 23 18:42:20 2014 +0200
added Vittorio Ruggiero's additional OpenMP support, and in particular added a new compiler option -DUSE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL to use OpenMP ATOMIC statements for some critical loops instead of OpenMP CRITICAL regions.
>---------------------------------------------------------------
70b3411f037d030b5e98d3b4a38fe63541f764f5
flags.guess | 7 +++++--
src/specfem3D/compute_forces_crust_mantle_Dev.F90 | 15 ++++++++++++++-
src/specfem3D/compute_forces_inner_core_Dev.F90 | 23 +++++++++++++++++++----
src/specfem3D/compute_forces_outer_core_Dev.F90 | 9 ++++++++-
src/specfem3D/update_displacement_Newmark.f90 | 8 ++++++++
5 files changed, 54 insertions(+), 8 deletions(-)
diff --git a/flags.guess b/flags.guess
index 68f9f53..1092c57 100644
--- a/flags.guess
+++ b/flags.guess
@@ -2,14 +2,17 @@
# Attempt to guess suitable flags for the Fortran compiler.
-# can add -DUSE_SERIAL_CASCADE_FOR_IOs to the compiler options to make the mesher output mesh data
+# one can add -DUSE_SERIAL_CASCADE_FOR_IOs to the compiler options to make the mesher output mesh data
# to the disk for one MPI slice after the other, and to make the solver do the same thing when reading the files back from disk.
-# can also add -DFORCE_VECTORIZATION to force vectorization and unrolling of some critical loops, however this breaks
+# one can also add -DFORCE_VECTORIZATION to force vectorization and unrolling of some critical loops, however this breaks
# range checking options at run time (for instance -check all for Intel ifort) and, more importantly, modern compilers
# vectorize the SPECFEM3D_GLOBE code very well and in practice this option only makes the code 3% to 5% faster in the best case,
# therefore we suggest not activating it.
+# for the OpenMP version, one can add -DUSE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL to use OpenMP ATOMIC statements
+# for some critical loops instead of OpenMP CRITICAL regions.
+
###########################################################################################################################
###########################################################################################################################
###########################################################################################################################
diff --git a/src/specfem3D/compute_forces_crust_mantle_Dev.F90 b/src/specfem3D/compute_forces_crust_mantle_Dev.F90
index 7356c61..794a294 100644
--- a/src/specfem3D/compute_forces_crust_mantle_Dev.F90
+++ b/src/specfem3D/compute_forces_crust_mantle_Dev.F90
@@ -318,7 +318,9 @@
! updates acceleration
#ifdef FORCE_VECTORIZATION
+#ifndef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
!$OMP CRITICAL
+#endif
! we can force vectorization using a compiler directive here because we know that there is no dependency
! inside a given spectral element, since all the global points of a local elements are different by definition
! (only common points between different elements can be the same)
@@ -337,13 +339,24 @@
! do NOT use array syntax ":" for the three statements below otherwise most compilers
! will not be able to vectorize the outer loop
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(INDEX_IJK,1)
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(INDEX_IJK,2)
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(INDEX_IJK,3)
#ifdef FORCE_VECTORIZATION
enddo
-!$OMP END CRITICAL
+#ifndef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP CRITICAL
+#endif
#else
enddo
enddo
diff --git a/src/specfem3D/compute_forces_inner_core_Dev.F90 b/src/specfem3D/compute_forces_inner_core_Dev.F90
index 1e5993e..3239f01 100644
--- a/src/specfem3D/compute_forces_inner_core_Dev.F90
+++ b/src/specfem3D/compute_forces_inner_core_Dev.F90
@@ -29,8 +29,6 @@
! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh
#include "config.fh"
-
-
subroutine compute_forces_inner_core_Dev( NSPEC,NGLOB,NSPEC_ATT, &
deltat, &
displ_inner_core, &
@@ -185,7 +183,11 @@
!$OMP sin_phi, cos_theta_sq, sin_theta_sq, cos_phi_sq, sin_phi_sq, int_radius, minus_g, rho, gxl, gyl, gzl, minus_dg, &
!$OMP minus_g_over_radius, minus_dg_plus_g_over_radius, Hxxl, Hyyl, Hzzl, Hxyl, Hxzl, Hyzl, sx_l, sy_l, sz_l, &
!$OMP factor, rho_s_H, newtempx2, newtempy2, newtempz2, fac1, fac2, fac3, sum_terms, newtempx1, newtempx3 , newtempy1, &
-!$OMP newtempy3, newtempz1, newtempz3, R_xx_val, R_yy_val)
+!$OMP newtempy3, newtempz1, newtempz3
+#ifdef FORCE_VECTORIZATION
+!$OMP ,R_xx_val, R_yy_val &
+#endif
+!$OMP )
!$OMP DO SCHEDULE(GUIDED)
do ispec_p = 1,num_elements
@@ -705,7 +707,9 @@
! sum contributions from each element to the global mesh and add gravity terms
#ifdef FORCE_VECTORIZATION
+#ifndef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
!$OMP CRITICAL
+#endif
! we can force vectorization using a compiler directive here because we know that there is no dependency
! inside a given spectral element, since all the global points of a local elements are different by definition
! (only common points between different elements can be the same)
@@ -723,13 +727,24 @@
iglob = ibool(INDEX_IJK,ispec)
! do NOT use array syntax ":" for the three statements below
! otherwise most compilers will not be able to vectorize the outer loop
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
accel_inner_core(1,iglob) = accel_inner_core(1,iglob) + sum_terms(INDEX_IJK,1)
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
accel_inner_core(2,iglob) = accel_inner_core(2,iglob) + sum_terms(INDEX_IJK,2)
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
accel_inner_core(3,iglob) = accel_inner_core(3,iglob) + sum_terms(INDEX_IJK,3)
#ifdef FORCE_VECTORIZATION
enddo
-!$OMP END CRITICAL
+#ifndef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP CRITICAL
+#endif
#else
enddo
enddo
diff --git a/src/specfem3D/compute_forces_outer_core_Dev.F90 b/src/specfem3D/compute_forces_outer_core_Dev.F90
index c1636ad..c4a84e7 100644
--- a/src/specfem3D/compute_forces_outer_core_Dev.F90
+++ b/src/specfem3D/compute_forces_outer_core_Dev.F90
@@ -388,7 +388,9 @@
! updates acceleration
#ifdef FORCE_VECTORIZATION
+#ifndef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
!$OMP CRITICAL
+#endif
! we can force vectorization using a compiler directive here because we know that there is no dependency
! inside a given spectral element, since all the global points of a local elements are different by definition
! (only common points between different elements can be the same)
@@ -403,11 +405,16 @@
do i=1,NGLLX
#endif
iglob = ibool(INDEX_IJK,ispec)
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
accelfluid(iglob) = accelfluid(iglob) + sum_terms(INDEX_IJK)
#ifdef FORCE_VECTORIZATION
enddo
-!$OMP END CRITICAL
+#ifndef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP CRITICAL
+#endif
#else
enddo
enddo
diff --git a/src/specfem3D/update_displacement_Newmark.f90 b/src/specfem3D/update_displacement_Newmark.f90
index d955fc3..9b38d7f 100644
--- a/src/specfem3D/update_displacement_Newmark.f90
+++ b/src/specfem3D/update_displacement_Newmark.f90
@@ -161,11 +161,19 @@
! Newmark time scheme update
if(FORCE_VECTORIZATION_VAL) then
+!$OMP PARALLEL DEFAULT(NONE) &
+!$OMP SHARED( NGLOB, displ, veloc, accel, &
+!$OMP deltat, deltatsqover2, deltatover2 ) &
+!$OMP PRIVATE(i)
+
+!$OMP DO SCHEDULE(GUIDED)
do i=1,NGLOB * NDIM
displ(i,1) = displ(i,1) + deltat * veloc(i,1) + deltatsqover2 * accel(i,1)
veloc(i,1) = veloc(i,1) + deltatover2 * accel(i,1)
accel(i,1) = 0._CUSTOM_REAL
enddo
+!$OMP enddo
+!$OMP END PARALLEL
else
More information about the CIG-COMMITS
mailing list