[cig-commits] [commit] devel, master: added Julien Derouillat's (from Maison de la Simulation, France) OpenMP support (b119f58)

Thu Nov 6 08:17:36 PST 2014

Repository : https://github.com/geodynamics/specfem3d_globe

On branches: devel,master
Link       : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f

>---------------------------------------------------------------

commit b119f5883ffac26b0e0747ed26b3f369e58093c6
Author: Dimitri Komatitsch <komatitsch at lma.cnrs-mrs.fr>
Date:   Tue May 20 20:54:13 2014 +0200

    added Julien Derouillat's (from Maison de la Simulation, France) OpenMP support


>---------------------------------------------------------------

b119f5883ffac26b0e0747ed26b3f369e58093c6
 src/specfem3D/compute_forces_crust_mantle_Dev.F90 | 40 ++++++++++++++++++++++-
 src/specfem3D/compute_forces_inner_core_Dev.F90   |  9 +++++
 src/specfem3D/compute_forces_outer_core_Dev.F90   | 25 ++++++++++++++
 src/specfem3D/multiply_arrays_source.f90          |  6 ++++
 src/specfem3D/update_displacement_Newmark.f90     | 22 +++++++++++--
 5 files changed, 98 insertions(+), 4 deletions(-)

diff --git a/src/specfem3D/compute_forces_crust_mantle_Dev.F90 b/src/specfem3D/compute_forces_crust_mantle_Dev.F90
index 78aa35e..baf9f72 100644
--- a/src/specfem3D/compute_forces_crust_mantle_Dev.F90
+++ b/src/specfem3D/compute_forces_crust_mantle_Dev.F90
@@ -29,7 +29,6 @@
 ! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh
 #include "config.fh"
 
-
   subroutine compute_forces_crust_mantle_Dev( NSPEC,NGLOB,NSPEC_ATT, &
                                               deltat, &
                                               displ_crust_mantle, &
@@ -155,6 +154,41 @@
     num_elements = nspec_inner
   endif
 
+!$OMP PARALLEL DEFAULT(NONE) &
+!$OMP SHARED(xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+!$OMP one_minus_sum_beta,epsilon_trace_over_3,c11store,c12store,c13store,c14store,c15store, &
+!$OMP c16store,c22store,c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
+!$OMP c36store,c44store,c45store,c46store,c55store,c56store,c66store,ispec_is_tiso, &
+!$OMP kappavstore,muvstore,kappahstore,muhstore,eta_anisostore,ibool,ystore,zstore, &
+!$OMP R_xx,R_yy,R_xy,R_xz,R_yz, &
+!$OMP xstore,minus_gravity_table,minus_deriv_gravity_table,density_table, &
+!$OMP displ_crust_mantle,wgll_cube,hprime_xxt,hprime_xx, &
+!$OMP vnspec, &
+!$OMP accel_crust_mantle, &
+!$OMP hprimewgll_xx,hprimewgll_xxt, &
+!$OMP alphaval,betaval, &
+!$OMP epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
+!$OMP gammaval,factor_common, &
+!$OMP iphase, &
+!$OMP phase_ispec_inner, &
+!$OMP num_elements, USE_LDDRK, &
+#ifdef FORCE_VECTORIZATION
+!$OMP wgllwgll_xy_3D, wgllwgll_xz_3D, wgllwgll_yz_3D, &
+#endif
+!$OMP R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk, &
+!$OMP deltat, COMPUTE_AND_STORE_STRAIN ) &
+!$OMP PRIVATE(ispec,fac1,fac2,fac3,sum_terms,ispec_p, &
+#ifdef FORCE_VECTORIZATION
+!$OMP ijk, &
+#else
+!$OMP i,j,k, &
+#endif
+!$OMP tempx1,tempx2,tempx3, &
+!$OMP newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3, &
+!$OMP dummyx_loc,dummyy_loc,dummyz_loc,rho_s_H,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
+!$OMP iglob,epsilondev_loc)
+
+!$OMP DO SCHEDULE(GUIDED)
   do ispec_p = 1,num_elements
 
     ispec = phase_ispec_inner(ispec_p,iphase)
@@ -284,6 +318,7 @@
     ! updates acceleration
 
 #ifdef FORCE_VECTORIZATION
+!$OMP CRITICAL
 ! we can force vectorization using a compiler directive here because we know that there is no dependency
 ! inside a given spectral element, since all the global points of a local elements are different by definition
 ! (only common points between different elements can be the same)
@@ -308,6 +343,7 @@
 
 #ifdef FORCE_VECTORIZATION
     enddo
+!$OMP END CRITICAL
 #else
         enddo
       enddo
@@ -358,6 +394,8 @@
     endif
 
   enddo ! of spectral element loop NSPEC_CRUST_MANTLE
+!$OMP ENDDO
+!$OMP END PARALLEL
 
   contains
 
diff --git a/src/specfem3D/compute_forces_inner_core_Dev.F90 b/src/specfem3D/compute_forces_inner_core_Dev.F90
index 2924b71..1d4c6ae 100644
--- a/src/specfem3D/compute_forces_inner_core_Dev.F90
+++ b/src/specfem3D/compute_forces_inner_core_Dev.F90
@@ -168,6 +168,11 @@
     num_elements = nspec_inner
   endif
 
+!$OMP PARALLEL DEFAULT( NONE ) &
+!$OMP SHARED( num_elements, phase_ispec_inner, iphase, idoubling, ibool, displ_inner_core, hprime_xx, hprime_xxT, xix, xiy, xiz, etax, etay, etaz, gammax, gammay, gammaz, COMPUTE_AND_STORE_STRAIN, c11store,  c12store, c13store, c33store, c44store, one_minus_sum_beta, muvstore, kappavstore, R_xx, R_yy, R_xy, R_xz, R_yz, xstore, ystore, zstore, minus_gravity_table, minus_deriv_gravity_table, density_table, wgll_cube, hprimewgll_xxT, hprimewgll_xx, wgllwgll_yz_3D, wgllwgll_xz_3D, wgllwgll_xy_3D, accel_inner_core, USE_LDDRK, R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk, vnspec, factor_common, deltat, alphaval,betaval,gammaval, epsilondev_xx,epsilondev_yy,epsilondev_xy, epsilondev_xz,epsilondev_yz, epsilon_trace_over_3 ) &
+!$OMP PRIVATE( ispec_p, ispec, iglob, dummyx_loc, dummyy_loc, dummyz_loc, tempx2, tempy2, tempz2, xixl, xiyl, xizl, etaxl, etayl, etazl, gammaxl, gammayl, gammazl, jacobianl, duxdxl, tempx1, tempx3, duxdyl, duxdzl, duydxl, tempy1, tempy3, duydyl, duydzl, tempz1, tempz3, duzdxl, duzdyl, duzdzl, duxdxl_plus_duydyl, duxdxl_plus_duzdzl, duydyl_plus_duzdzl, duxdyl_plus_duydxl, duzdxl_plus_duxdzl, duzdyl_plus_duydzl, templ, epsilondev_loc, c11l, c12l, c13l, c33l, c44l, minus_sum_beta, mul, sigma_xx, sigma_yy, sigma_zz, sigma_xy, sigma_xz, sigma_yz, kappal, lambdalplus2mul, lambdal, sigma_yx, sigma_zx, sigma_zy, radius, theta, phi, cos_theta, sin_theta, cos_phi, sin_phi, cos_theta_sq, sin_theta_sq, cos_phi_sq, sin_phi_sq, int_radius, minus_g, rho, gxl, gyl, gzl, minus_dg, minus_g_over_radius, minus_dg_plus_g_over_radius, Hxxl, Hyyl, Hzzl, Hxyl, Hxzl, Hyzl, sx_l, sy_l, sz_l, factor, rho_s_H, newtempx2, newtempy2, newtempz2, fac1, fac2, fac3, sum_terms, newtempx1, newtempx3 , newtempy1, newt
 empy3, newtempz1, newtempz3, R_xx_val, R_yy_val)
+
+!$OMP DO SCHEDULE(GUIDED)
   do ispec_p = 1,num_elements
 
     ispec = phase_ispec_inner(ispec_p,iphase)
@@ -685,6 +690,7 @@
 
       ! sum contributions from each element to the global mesh and add gravity terms
 #ifdef FORCE_VECTORIZATION
+!$OMP CRITICAL
 ! we can force vectorization using a compiler directive here because we know that there is no dependency
 ! inside a given spectral element, since all the global points of a local elements are different by definition
 ! (only common points between different elements can be the same)
@@ -708,6 +714,7 @@
 
 #ifdef FORCE_VECTORIZATION
       enddo
+!$OMP END CRITICAL
 #else
           enddo
         enddo
@@ -760,6 +767,8 @@
     endif ! end of test to exclude fictitious elements in central cube
 
   enddo ! of spectral element loop
+!$OMP ENDDO
+!$OMP END PARALLEL
 
   contains
 
diff --git a/src/specfem3D/compute_forces_outer_core_Dev.F90 b/src/specfem3D/compute_forces_outer_core_Dev.F90
index 8297247..93157b8 100644
--- a/src/specfem3D/compute_forces_outer_core_Dev.F90
+++ b/src/specfem3D/compute_forces_outer_core_Dev.F90
@@ -137,6 +137,27 @@
     num_elements = nspec_inner
   endif
 
+!$OMP PARALLEL DEFAULT(NONE) &
+!$OMP SHARED( &
+!$OMP num_elements, phase_ispec_inner, iphase, ibool, displfluid, xstore, ystore, zstore, &
+!$OMP d_ln_density_dr_table, hprime_xx, hprime_xxT, xix, xiy, xiz,  etax, etay, etaz, &
+!$OMP gammax, gammay, gammaz, deltat, two_omega_earth, timeval, A_array_rotation, B_array_rotation,   &
+!$OMP minus_rho_g_over_kappa_fluid, wgll_cube, MOVIE_VOLUME, hprimewgll_xxT, hprimewgll_xx, &
+!$OMP wgllwgll_yz_3D, wgllwgll_xz_3D, wgllwgll_xy_3D, accelfluid, USE_LDDRK, A_array_rotation_lddrk, istage, B_array_rotation_lddrk, div_displfluid ) &
+!$OMP PRIVATE( &
+!$OMP ispec_p, ispec, iglob, dummyx_loc, radius, theta, phi, &
+!$OMP cos_theta, sin_theta, cos_phi, sin_phi, int_radius, &
+!$OMP displ_times_grad_x_ln_rho, displ_times_grad_y_ln_rho, displ_times_grad_z_ln_rho, &
+!$OMP temp_gxl, temp_gyl, temp_gzl, tempx2, &
+!$OMP xixl, xiyl, xizl, etaxl, etayl, etazl, gammaxl, gammayl, gammazl, jacobianl, &
+!$OMP dpotentialdxl, tempx1, tempx3, dpotentialdyl, dpotentialdzl, two_omega_deltat, cos_two_omega_t, &
+!$OMP sin_two_omega_t, source_euler_A, source_euler_B, A_rotation, B_rotation, ux_rotation, uy_rotation, &
+!$OMP dpotentialdx_with_rot, dpotentialdy_with_rot, gxl, gyl, gzl, gravity_term, &
+!$OMP sum_terms, newtempx1, newtempx3, &
+!$OMP newtempx2   &
+!$OMP )
+
+!$OMP DO SCHEDULE(GUIDED)
   do ispec_p = 1,num_elements
 
     ispec = phase_ispec_inner(ispec_p,iphase)
@@ -368,6 +389,7 @@
     ! updates acceleration
 
 #ifdef FORCE_VECTORIZATION
+!$OMP CRITICAL
 ! we can force vectorization using a compiler directive here because we know that there is no dependency
 ! inside a given spectral element, since all the global points of a local elements are different by definition
 ! (only common points between different elements can be the same)
@@ -386,6 +408,7 @@
 
 #ifdef FORCE_VECTORIZATION
     enddo
+!$OMP END CRITICAL
 #else
         enddo
       enddo
@@ -426,6 +449,8 @@
     endif
 
   enddo   ! spectral element loop
+!$OMP ENDDO
+!$OMP END PARALLEL
 
   contains
 
diff --git a/src/specfem3D/multiply_arrays_source.f90 b/src/specfem3D/multiply_arrays_source.f90
index 14b76b3..208fa5a 100644
--- a/src/specfem3D/multiply_arrays_source.f90
+++ b/src/specfem3D/multiply_arrays_source.f90
@@ -75,11 +75,17 @@
   ! updates acceleration w/ rotation in elastic region
 
   ! see input call, differs for corrected mass matrices for rmassx,rmassy,rmassz
+!$OMP PARALLEL DEFAULT(NONE) &
+!$OMP SHARED(NGLOB, accel, rmassx, rmassy, rmassz, two_omega_earth, veloc) &
+!$OMP PRIVATE(i)
+!$OMP DO SCHEDULE(GUIDED)
   do i=1,NGLOB
     accel(1,i) = accel(1,i)*rmassx(i) + two_omega_earth*veloc(2,i)
     accel(2,i) = accel(2,i)*rmassy(i) - two_omega_earth*veloc(1,i)
     accel(3,i) = accel(3,i)*rmassz(i)
   enddo
+!$OMP enddo
+!$OMP END PARALLEL
 
   end subroutine multiply_accel_elastic
 
diff --git a/src/specfem3D/update_displacement_Newmark.f90 b/src/specfem3D/update_displacement_Newmark.f90
index 979a8d6..07a887d 100644
--- a/src/specfem3D/update_displacement_Newmark.f90
+++ b/src/specfem3D/update_displacement_Newmark.f90
@@ -198,12 +198,20 @@
   ! local parameters
   integer :: i
 
+!$OMP PARALLEL DEFAULT(NONE) &
+!$OMP SHARED( NGLOB, displ, veloc, accel, &
+!$OMP deltat, deltatsqover2, deltatover2 ) &
+!$OMP PRIVATE(i)
+
   ! Newmark time scheme update
+!$OMP DO SCHEDULE(GUIDED)
   do i=1,NGLOB
     displ(i) = displ(i) + deltat * veloc(i) + deltatsqover2 * accel(i)
     veloc(i) = veloc(i) + deltatover2 * accel(i)
     accel(i) = 0._CUSTOM_REAL
   enddo
+!$OMP ENDDO
+!$OMP END PARALLEL
 
   end subroutine update_displ_acoustic
 
@@ -280,9 +288,6 @@
   ! velocity potential
   real(kind=CUSTOM_REAL), dimension(NGLOB) :: veloc_outer_core,accel_outer_core
 
-  ! mass matrix
-!!!!!  real(kind=CUSTOM_REAL), dimension(NGLOB) :: rmass_outer_core
-
   real(kind=CUSTOM_REAL) :: deltatover2
 
   ! local parameters
@@ -393,17 +398,26 @@
   !   - inner core region
   !         needs both, acceleration update & velocity corrector terms
 
+!$OMP PARALLEL DEFAULT(NONE) &
+!$OMP SHARED( NGLOB_CM, veloc_crust_mantle, deltatover2, &
+!$OMP accel_crust_mantle, NGLOB_IC, veloc_inner_core, accel_inner_core) &
+!$OMP PRIVATE(i)
+
   if(FORCE_VECTORIZATION_VAL) then
 
     ! crust/mantle
+!$OMP DO SCHEDULE(GUIDED)
     do i=1,NGLOB_CM * NDIM
       veloc_crust_mantle(i,1) = veloc_crust_mantle(i,1) + deltatover2*accel_crust_mantle(i,1)
     enddo
+!$OMP ENDDO
 
+!$OMP DO SCHEDULE(GUIDED)
     ! inner core
     do i=1,NGLOB_IC * NDIM
       veloc_inner_core(i,1) = veloc_inner_core(i,1) + deltatover2*accel_inner_core(i,1)
     enddo
+!$OMP ENDDO
 
   else
 
@@ -419,5 +433,7 @@
 
   endif
 
+!$OMP END PARALLEL
+
   end subroutine update_veloc_elastic