[cig-commits] [commit] devel, master: added Julien Derouillat's (from Maison de la Simulation, France) OpenMP support (b119f58)
cig_noreply at geodynamics.org
cig_noreply at geodynamics.org
Thu Nov 6 08:17:36 PST 2014
Repository : https://github.com/geodynamics/specfem3d_globe
On branches: devel,master
Link : https://github.com/geodynamics/specfem3d_globe/compare/bc58e579b3b0838a0968725a076f5904845437ca...be63f20cbb6f462104e949894dbe205d2398cd7f
>---------------------------------------------------------------
commit b119f5883ffac26b0e0747ed26b3f369e58093c6
Author: Dimitri Komatitsch <komatitsch at lma.cnrs-mrs.fr>
Date: Tue May 20 20:54:13 2014 +0200
added Julien Derouillat's (from Maison de la Simulation, France) OpenMP support
>---------------------------------------------------------------
b119f5883ffac26b0e0747ed26b3f369e58093c6
src/specfem3D/compute_forces_crust_mantle_Dev.F90 | 40 ++++++++++++++++++++++-
src/specfem3D/compute_forces_inner_core_Dev.F90 | 9 +++++
src/specfem3D/compute_forces_outer_core_Dev.F90 | 25 ++++++++++++++
src/specfem3D/multiply_arrays_source.f90 | 6 ++++
src/specfem3D/update_displacement_Newmark.f90 | 22 +++++++++++--
5 files changed, 98 insertions(+), 4 deletions(-)
diff --git a/src/specfem3D/compute_forces_crust_mantle_Dev.F90 b/src/specfem3D/compute_forces_crust_mantle_Dev.F90
index 78aa35e..baf9f72 100644
--- a/src/specfem3D/compute_forces_crust_mantle_Dev.F90
+++ b/src/specfem3D/compute_forces_crust_mantle_Dev.F90
@@ -29,7 +29,6 @@
! and macros INDEX_IJK, DO_LOOP_IJK, ENDDO_LOOP_IJK defined in config.fh
#include "config.fh"
-
subroutine compute_forces_crust_mantle_Dev( NSPEC,NGLOB,NSPEC_ATT, &
deltat, &
displ_crust_mantle, &
@@ -155,6 +154,41 @@
num_elements = nspec_inner
endif
+!$OMP PARALLEL DEFAULT(NONE) &
+!$OMP SHARED(xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+!$OMP one_minus_sum_beta,epsilon_trace_over_3,c11store,c12store,c13store,c14store,c15store, &
+!$OMP c16store,c22store,c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
+!$OMP c36store,c44store,c45store,c46store,c55store,c56store,c66store,ispec_is_tiso, &
+!$OMP kappavstore,muvstore,kappahstore,muhstore,eta_anisostore,ibool,ystore,zstore, &
+!$OMP R_xx,R_yy,R_xy,R_xz,R_yz, &
+!$OMP xstore,minus_gravity_table,minus_deriv_gravity_table,density_table, &
+!$OMP displ_crust_mantle,wgll_cube,hprime_xxt,hprime_xx, &
+!$OMP vnspec, &
+!$OMP accel_crust_mantle, &
+!$OMP hprimewgll_xx,hprimewgll_xxt, &
+!$OMP alphaval,betaval, &
+!$OMP epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz, &
+!$OMP gammaval,factor_common, &
+!$OMP iphase, &
+!$OMP phase_ispec_inner, &
+!$OMP num_elements, USE_LDDRK, &
+#ifdef FORCE_VECTORIZATION
+!$OMP wgllwgll_xy_3D, wgllwgll_xz_3D, wgllwgll_yz_3D, &
+#endif
+!$OMP R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk, &
+!$OMP deltat, COMPUTE_AND_STORE_STRAIN ) &
+!$OMP PRIVATE(ispec,fac1,fac2,fac3,sum_terms,ispec_p, &
+#ifdef FORCE_VECTORIZATION
+!$OMP ijk, &
+#else
+!$OMP i,j,k, &
+#endif
+!$OMP tempx1,tempx2,tempx3, &
+!$OMP newtempx1,newtempx2,newtempx3,newtempy1,newtempy2,newtempy3,newtempz1,newtempz2,newtempz3, &
+!$OMP dummyx_loc,dummyy_loc,dummyz_loc,rho_s_H,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3, &
+!$OMP iglob,epsilondev_loc)
+
+!$OMP DO SCHEDULE(GUIDED)
do ispec_p = 1,num_elements
ispec = phase_ispec_inner(ispec_p,iphase)
@@ -284,6 +318,7 @@
! updates acceleration
#ifdef FORCE_VECTORIZATION
+!$OMP CRITICAL
! we can force vectorization using a compiler directive here because we know that there is no dependency
! inside a given spectral element, since all the global points of a local elements are different by definition
! (only common points between different elements can be the same)
@@ -308,6 +343,7 @@
#ifdef FORCE_VECTORIZATION
enddo
+!$OMP END CRITICAL
#else
enddo
enddo
@@ -358,6 +394,8 @@
endif
enddo ! of spectral element loop NSPEC_CRUST_MANTLE
+!$OMP ENDDO
+!$OMP END PARALLEL
contains
diff --git a/src/specfem3D/compute_forces_inner_core_Dev.F90 b/src/specfem3D/compute_forces_inner_core_Dev.F90
index 2924b71..1d4c6ae 100644
--- a/src/specfem3D/compute_forces_inner_core_Dev.F90
+++ b/src/specfem3D/compute_forces_inner_core_Dev.F90
@@ -168,6 +168,11 @@
num_elements = nspec_inner
endif
+!$OMP PARALLEL DEFAULT( NONE ) &
+!$OMP SHARED( num_elements, phase_ispec_inner, iphase, idoubling, ibool, displ_inner_core, hprime_xx, hprime_xxT, xix, xiy, xiz, etax, etay, etaz, gammax, gammay, gammaz, COMPUTE_AND_STORE_STRAIN, c11store, c12store, c13store, c33store, c44store, one_minus_sum_beta, muvstore, kappavstore, R_xx, R_yy, R_xy, R_xz, R_yz, xstore, ystore, zstore, minus_gravity_table, minus_deriv_gravity_table, density_table, wgll_cube, hprimewgll_xxT, hprimewgll_xx, wgllwgll_yz_3D, wgllwgll_xz_3D, wgllwgll_xy_3D, accel_inner_core, USE_LDDRK, R_xx_lddrk,R_yy_lddrk,R_xy_lddrk,R_xz_lddrk,R_yz_lddrk, vnspec, factor_common, deltat, alphaval,betaval,gammaval, epsilondev_xx,epsilondev_yy,epsilondev_xy, epsilondev_xz,epsilondev_yz, epsilon_trace_over_3 ) &
+!$OMP PRIVATE( ispec_p, ispec, iglob, dummyx_loc, dummyy_loc, dummyz_loc, tempx2, tempy2, tempz2, xixl, xiyl, xizl, etaxl, etayl, etazl, gammaxl, gammayl, gammazl, jacobianl, duxdxl, tempx1, tempx3, duxdyl, duxdzl, duydxl, tempy1, tempy3, duydyl, duydzl, tempz1, tempz3, duzdxl, duzdyl, duzdzl, duxdxl_plus_duydyl, duxdxl_plus_duzdzl, duydyl_plus_duzdzl, duxdyl_plus_duydxl, duzdxl_plus_duxdzl, duzdyl_plus_duydzl, templ, epsilondev_loc, c11l, c12l, c13l, c33l, c44l, minus_sum_beta, mul, sigma_xx, sigma_yy, sigma_zz, sigma_xy, sigma_xz, sigma_yz, kappal, lambdalplus2mul, lambdal, sigma_yx, sigma_zx, sigma_zy, radius, theta, phi, cos_theta, sin_theta, cos_phi, sin_phi, cos_theta_sq, sin_theta_sq, cos_phi_sq, sin_phi_sq, int_radius, minus_g, rho, gxl, gyl, gzl, minus_dg, minus_g_over_radius, minus_dg_plus_g_over_radius, Hxxl, Hyyl, Hzzl, Hxyl, Hxzl, Hyzl, sx_l, sy_l, sz_l, factor, rho_s_H, newtempx2, newtempy2, newtempz2, fac1, fac2, fac3, sum_terms, newtempx1, newtempx3 , newtempy1, newt
empy3, newtempz1, newtempz3, R_xx_val, R_yy_val)
+
+!$OMP DO SCHEDULE(GUIDED)
do ispec_p = 1,num_elements
ispec = phase_ispec_inner(ispec_p,iphase)
@@ -685,6 +690,7 @@
! sum contributions from each element to the global mesh and add gravity terms
#ifdef FORCE_VECTORIZATION
+!$OMP CRITICAL
! we can force vectorization using a compiler directive here because we know that there is no dependency
! inside a given spectral element, since all the global points of a local elements are different by definition
! (only common points between different elements can be the same)
@@ -708,6 +714,7 @@
#ifdef FORCE_VECTORIZATION
enddo
+!$OMP END CRITICAL
#else
enddo
enddo
@@ -760,6 +767,8 @@
endif ! end of test to exclude fictitious elements in central cube
enddo ! of spectral element loop
+!$OMP ENDDO
+!$OMP END PARALLEL
contains
diff --git a/src/specfem3D/compute_forces_outer_core_Dev.F90 b/src/specfem3D/compute_forces_outer_core_Dev.F90
index 8297247..93157b8 100644
--- a/src/specfem3D/compute_forces_outer_core_Dev.F90
+++ b/src/specfem3D/compute_forces_outer_core_Dev.F90
@@ -137,6 +137,27 @@
num_elements = nspec_inner
endif
+!$OMP PARALLEL DEFAULT(NONE) &
+!$OMP SHARED( &
+!$OMP num_elements, phase_ispec_inner, iphase, ibool, displfluid, xstore, ystore, zstore, &
+!$OMP d_ln_density_dr_table, hprime_xx, hprime_xxT, xix, xiy, xiz, etax, etay, etaz, &
+!$OMP gammax, gammay, gammaz, deltat, two_omega_earth, timeval, A_array_rotation, B_array_rotation, &
+!$OMP minus_rho_g_over_kappa_fluid, wgll_cube, MOVIE_VOLUME, hprimewgll_xxT, hprimewgll_xx, &
+!$OMP wgllwgll_yz_3D, wgllwgll_xz_3D, wgllwgll_xy_3D, accelfluid, USE_LDDRK, A_array_rotation_lddrk, istage, B_array_rotation_lddrk, div_displfluid ) &
+!$OMP PRIVATE( &
+!$OMP ispec_p, ispec, iglob, dummyx_loc, radius, theta, phi, &
+!$OMP cos_theta, sin_theta, cos_phi, sin_phi, int_radius, &
+!$OMP displ_times_grad_x_ln_rho, displ_times_grad_y_ln_rho, displ_times_grad_z_ln_rho, &
+!$OMP temp_gxl, temp_gyl, temp_gzl, tempx2, &
+!$OMP xixl, xiyl, xizl, etaxl, etayl, etazl, gammaxl, gammayl, gammazl, jacobianl, &
+!$OMP dpotentialdxl, tempx1, tempx3, dpotentialdyl, dpotentialdzl, two_omega_deltat, cos_two_omega_t, &
+!$OMP sin_two_omega_t, source_euler_A, source_euler_B, A_rotation, B_rotation, ux_rotation, uy_rotation, &
+!$OMP dpotentialdx_with_rot, dpotentialdy_with_rot, gxl, gyl, gzl, gravity_term, &
+!$OMP sum_terms, newtempx1, newtempx3, &
+!$OMP newtempx2 &
+!$OMP )
+
+!$OMP DO SCHEDULE(GUIDED)
do ispec_p = 1,num_elements
ispec = phase_ispec_inner(ispec_p,iphase)
@@ -368,6 +389,7 @@
! updates acceleration
#ifdef FORCE_VECTORIZATION
+!$OMP CRITICAL
! we can force vectorization using a compiler directive here because we know that there is no dependency
! inside a given spectral element, since all the global points of a local elements are different by definition
! (only common points between different elements can be the same)
@@ -386,6 +408,7 @@
#ifdef FORCE_VECTORIZATION
enddo
+!$OMP END CRITICAL
#else
enddo
enddo
@@ -426,6 +449,8 @@
endif
enddo ! spectral element loop
+!$OMP ENDDO
+!$OMP END PARALLEL
contains
diff --git a/src/specfem3D/multiply_arrays_source.f90 b/src/specfem3D/multiply_arrays_source.f90
index 14b76b3..208fa5a 100644
--- a/src/specfem3D/multiply_arrays_source.f90
+++ b/src/specfem3D/multiply_arrays_source.f90
@@ -75,11 +75,17 @@
! updates acceleration w/ rotation in elastic region
! see input call, differs for corrected mass matrices for rmassx,rmassy,rmassz
+!$OMP PARALLEL DEFAULT(NONE) &
+!$OMP SHARED(NGLOB, accel, rmassx, rmassy, rmassz, two_omega_earth, veloc) &
+!$OMP PRIVATE(i)
+!$OMP DO SCHEDULE(GUIDED)
do i=1,NGLOB
accel(1,i) = accel(1,i)*rmassx(i) + two_omega_earth*veloc(2,i)
accel(2,i) = accel(2,i)*rmassy(i) - two_omega_earth*veloc(1,i)
accel(3,i) = accel(3,i)*rmassz(i)
enddo
+!$OMP enddo
+!$OMP END PARALLEL
end subroutine multiply_accel_elastic
diff --git a/src/specfem3D/update_displacement_Newmark.f90 b/src/specfem3D/update_displacement_Newmark.f90
index 979a8d6..07a887d 100644
--- a/src/specfem3D/update_displacement_Newmark.f90
+++ b/src/specfem3D/update_displacement_Newmark.f90
@@ -198,12 +198,20 @@
! local parameters
integer :: i
+!$OMP PARALLEL DEFAULT(NONE) &
+!$OMP SHARED( NGLOB, displ, veloc, accel, &
+!$OMP deltat, deltatsqover2, deltatover2 ) &
+!$OMP PRIVATE(i)
+
! Newmark time scheme update
+!$OMP DO SCHEDULE(GUIDED)
do i=1,NGLOB
displ(i) = displ(i) + deltat * veloc(i) + deltatsqover2 * accel(i)
veloc(i) = veloc(i) + deltatover2 * accel(i)
accel(i) = 0._CUSTOM_REAL
enddo
+!$OMP ENDDO
+!$OMP END PARALLEL
end subroutine update_displ_acoustic
@@ -280,9 +288,6 @@
! velocity potential
real(kind=CUSTOM_REAL), dimension(NGLOB) :: veloc_outer_core,accel_outer_core
- ! mass matrix
-!!!!! real(kind=CUSTOM_REAL), dimension(NGLOB) :: rmass_outer_core
-
real(kind=CUSTOM_REAL) :: deltatover2
! local parameters
@@ -393,17 +398,26 @@
! - inner core region
! needs both, acceleration update & velocity corrector terms
+!$OMP PARALLEL DEFAULT(NONE) &
+!$OMP SHARED( NGLOB_CM, veloc_crust_mantle, deltatover2, &
+!$OMP accel_crust_mantle, NGLOB_IC, veloc_inner_core, accel_inner_core) &
+!$OMP PRIVATE(i)
+
if(FORCE_VECTORIZATION_VAL) then
! crust/mantle
+!$OMP DO SCHEDULE(GUIDED)
do i=1,NGLOB_CM * NDIM
veloc_crust_mantle(i,1) = veloc_crust_mantle(i,1) + deltatover2*accel_crust_mantle(i,1)
enddo
+!$OMP ENDDO
+!$OMP DO SCHEDULE(GUIDED)
! inner core
do i=1,NGLOB_IC * NDIM
veloc_inner_core(i,1) = veloc_inner_core(i,1) + deltatover2*accel_inner_core(i,1)
enddo
+!$OMP ENDDO
else
@@ -419,5 +433,7 @@
endif
+!$OMP END PARALLEL
+
end subroutine update_veloc_elastic
More information about the CIG-COMMITS
mailing list