[cig-commits] [commit] devel, master: added Vittorio Ruggiero's additional OpenMP support, and in particular added a new compiler option -DUSE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL to use OpenMP ATOMIC statements for some critical loops instead of OpenMP CRITICAL regions. (70b3411)

Thu Nov 6 08:18:36 PST 2014

Repository : https://github.com/geodynamics/specfem3d_globe

On branches: devel,master
Link       : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f

>---------------------------------------------------------------

commit 70b3411f037d030b5e98d3b4a38fe63541f764f5
Author: Dimitri Komatitsch <komatitsch at lma.cnrs-mrs.fr>
Date:   Fri May 23 18:42:20 2014 +0200

    added Vittorio Ruggiero's additional OpenMP support, and in particular added a new compiler option -DUSE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL to use OpenMP ATOMIC statements for some critical loops instead of OpenMP CRITICAL regions.


>---------------------------------------------------------------

70b3411f037d030b5e98d3b4a38fe63541f764f5
 flags.guess                                       |  7 +++++--
 src/specfem3D/compute_forces_crust_mantle_Dev.F90 | 15 ++++++++++++++-
 src/specfem3D/compute_forces_inner_core_Dev.F90   | 23 +++++++++++++++++++----
 src/specfem3D/compute_forces_outer_core_Dev.F90   |  9 ++++++++-
 src/specfem3D/update_displacement_Newmark.f90     |  8 ++++++++
 5 files changed, 54 insertions(+), 8 deletions(-)

diff --git a/flags.guess b/flags.guess
index 68f9f53..1092c57 100644
--- a/flags.guess
+++ b/flags.guess
@@ -2,14 +2,17 @@
 
 # Attempt to guess suitable flags for the Fortran compiler.
 
-# can add -DUSE_SERIAL_CASCADE_FOR_IOs to the compiler options to make the mesher output mesh data
+# one can add -DUSE_SERIAL_CASCADE_FOR_IOs to the compiler options to make the mesher output mesh data
 # to the disk for one MPI slice after the other, and to make the solver do the same thing when reading the files back from disk.
 
-# can also add -DFORCE_VECTORIZATION to force vectorization and unrolling of some critical loops, however this breaks
+# one can also add -DFORCE_VECTORIZATION to force vectorization and unrolling of some critical loops, however this breaks
 # range checking options at run time (for instance -check all for Intel ifort) and, more importantly, modern compilers
 # vectorize the SPECFEM3D_GLOBE code very well and in practice this option only makes the code 3% to 5% faster in the best case,
 # therefore we suggest not activating it.
 
+# for the OpenMP version, one can add -DUSE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL to use OpenMP ATOMIC statements
+# for some critical loops instead of OpenMP CRITICAL regions.
+
 ###########################################################################################################################
 ###########################################################################################################################
 ###########################################################################################################################
diff --git a/src/specfem3D/compute_forces_crust_mantle_Dev.F90 b/src/specfem3D/compute_forces_crust_mantle_Dev.F90
index 7356c61..794a294 100644
--- a/src/specfem3D/compute_forces_crust_mantle_Dev.F90
+++ b/src/specfem3D/compute_forces_crust_mantle_Dev.F90
@@ -318,7 +318,9 @@
     ! updates acceleration
 
 #ifdef FORCE_VECTORIZATION
+#ifndef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
 !$OMP CRITICAL
+#endif
 ! we can force vectorization using a compiler directive here because we know that there is no dependency
 ! inside a given spectral element, since all the global points of a local elements are different by definition
 ! (only common points between different elements can be the same)
@@ -337,13 +339,24 @@
 
           ! do NOT use array syntax ":" for the three statements below otherwise most compilers
           ! will not be able to vectorize the outer loop
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
           accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(INDEX_IJK,1)
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
           accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(INDEX_IJK,2)
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
           accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(INDEX_IJK,3)
 
 #ifdef FORCE_VECTORIZATION
     enddo
-!$OMP END CRITICAL
+#ifndef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP CRITICAL
+#endif
 #else
         enddo
       enddo
diff --git a/src/specfem3D/compute_forces_inner_core_Dev.F90 b/src/specfem3D/compute_forces_inner_core_Dev.F90
index 1e5993e..3239f01 100644
--- a/src/specfem3D/compute_forces_inner_core_Dev.F90
+++ b/src/specfem3D/compute_forces_inner_core_Dev.F90
@@ -29,8 +29,6 @@
 ! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh
 #include "config.fh"
 
-
-
   subroutine compute_forces_inner_core_Dev( NSPEC,NGLOB,NSPEC_ATT, &
                                             deltat, &
                                             displ_inner_core, &
@@ -185,7 +183,11 @@
 !$OMP sin_phi, cos_theta_sq, sin_theta_sq, cos_phi_sq, sin_phi_sq, int_radius, minus_g, rho, gxl, gyl, gzl, minus_dg, &
 !$OMP minus_g_over_radius, minus_dg_plus_g_over_radius, Hxxl, Hyyl, Hzzl, Hxyl, Hxzl, Hyzl, sx_l, sy_l, sz_l, &
 !$OMP factor, rho_s_H, newtempx2, newtempy2, newtempz2, fac1, fac2, fac3, sum_terms, newtempx1, newtempx3 , newtempy1, &
-!$OMP newtempy3, newtempz1, newtempz3, R_xx_val, R_yy_val)
+!$OMP newtempy3, newtempz1, newtempz3
+#ifdef FORCE_VECTORIZATION
+!$OMP ,R_xx_val, R_yy_val &
+#endif
+!$OMP )
 
 !$OMP DO SCHEDULE(GUIDED)
   do ispec_p = 1,num_elements
@@ -705,7 +707,9 @@
 
       ! sum contributions from each element to the global mesh and add gravity terms
 #ifdef FORCE_VECTORIZATION
+#ifndef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
 !$OMP CRITICAL
+#endif
 ! we can force vectorization using a compiler directive here because we know that there is no dependency
 ! inside a given spectral element, since all the global points of a local elements are different by definition
 ! (only common points between different elements can be the same)
@@ -723,13 +727,24 @@
             iglob = ibool(INDEX_IJK,ispec)
             ! do NOT use array syntax ":" for the three statements below
             ! otherwise most compilers will not be able to vectorize the outer loop
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
             accel_inner_core(1,iglob) = accel_inner_core(1,iglob) + sum_terms(INDEX_IJK,1)
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
             accel_inner_core(2,iglob) = accel_inner_core(2,iglob) + sum_terms(INDEX_IJK,2)
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
             accel_inner_core(3,iglob) = accel_inner_core(3,iglob) + sum_terms(INDEX_IJK,3)
 
 #ifdef FORCE_VECTORIZATION
       enddo
-!$OMP END CRITICAL
+#ifndef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP CRITICAL
+#endif
 #else
           enddo
         enddo
diff --git a/src/specfem3D/compute_forces_outer_core_Dev.F90 b/src/specfem3D/compute_forces_outer_core_Dev.F90
index c1636ad..c4a84e7 100644
--- a/src/specfem3D/compute_forces_outer_core_Dev.F90
+++ b/src/specfem3D/compute_forces_outer_core_Dev.F90
@@ -388,7 +388,9 @@
     ! updates acceleration
 
 #ifdef FORCE_VECTORIZATION
+#ifndef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
 !$OMP CRITICAL
+#endif
 ! we can force vectorization using a compiler directive here because we know that there is no dependency
 ! inside a given spectral element, since all the global points of a local elements are different by definition
 ! (only common points between different elements can be the same)
@@ -403,11 +405,16 @@
         do i=1,NGLLX
 #endif
           iglob = ibool(INDEX_IJK,ispec)
+#ifdef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP ATOMIC
+#endif
           accelfluid(iglob) = accelfluid(iglob) + sum_terms(INDEX_IJK)
 
 #ifdef FORCE_VECTORIZATION
     enddo
-!$OMP END CRITICAL
+#ifndef USE_OPENMP_ATOMIC_INSTEAD_OF_CRITICAL
+!$OMP CRITICAL
+#endif
 #else
         enddo
       enddo
diff --git a/src/specfem3D/update_displacement_Newmark.f90 b/src/specfem3D/update_displacement_Newmark.f90
index d955fc3..9b38d7f 100644
--- a/src/specfem3D/update_displacement_Newmark.f90
+++ b/src/specfem3D/update_displacement_Newmark.f90
@@ -161,11 +161,19 @@
   ! Newmark time scheme update
   if(FORCE_VECTORIZATION_VAL) then
 
+!$OMP PARALLEL DEFAULT(NONE) &
+!$OMP SHARED( NGLOB, displ, veloc, accel, &
+!$OMP deltat, deltatsqover2, deltatover2 ) &
+!$OMP PRIVATE(i)
+
+!$OMP DO SCHEDULE(GUIDED)
     do i=1,NGLOB * NDIM
       displ(i,1) = displ(i,1) + deltat * veloc(i,1) + deltatsqover2 * accel(i,1)
       veloc(i,1) = veloc(i,1) + deltatover2 * accel(i,1)
       accel(i,1) = 0._CUSTOM_REAL
     enddo
+!$OMP enddo
+!$OMP END PARALLEL
 
   else