[cig-commits] [commit] devel: avoids compiler warnings and adds parameter statement to LDDRK constants; needs a workaround in compute_forces_outer_core_Dev.F90 because of a gfortran OpenMP bug (560083c)
cig_noreply at geodynamics.org
cig_noreply at geodynamics.org
Tue Nov 25 06:56:10 PST 2014
Repository : https://github.com/geodynamics/specfem3d_globe
On branch : devel
Link : https://github.com/geodynamics/specfem3d_globe/compare/0e5b55c6f30be94583639fd325373eecd6facc6d...8be3e0b0267c8d4cf5af3bc26e8903da17bc4fd1
>---------------------------------------------------------------
commit 560083c976e7e3cf090098611b659a22c28b5e80
Author: daniel peter <peterda at ethz.ch>
Date: Sun Nov 9 00:18:40 2014 +0100
avoids compiler warnings and adds parameter statement to LDDRK constants; needs a workaround in compute_forces_outer_core_Dev.F90 because of a gfortran OpenMP bug
>---------------------------------------------------------------
560083c976e7e3cf090098611b659a22c28b5e80
setup/constants.h.in | 6 +-
src/auxiliaries/combine_AVS_DX.f90 | 6 +
src/gpu/prepare_mesh_constants_gpu.c | 2 -
src/meshfem3D/setup_MPI_interfaces.f90 | 58 ++---
src/meshfem3D/test_MPI_interfaces.f90 | 19 +-
src/shared/save_header_file.F90 | 298 ++++++++++++------------
src/specfem3D/compute_forces_outer_core_Dev.F90 | 28 ++-
7 files changed, 220 insertions(+), 197 deletions(-)
diff --git a/setup/constants.h.in b/setup/constants.h.in
index 6e7bd77..2721a43 100644
--- a/setup/constants.h.in
+++ b/setup/constants.h.in
@@ -837,15 +837,15 @@
!!-----------------------------------------------------------
integer, parameter :: NSTAGE = 6
- real(kind=CUSTOM_REAL), dimension(NSTAGE) :: ALPHA_LDDRK = &
+ real(kind=CUSTOM_REAL), dimension(NSTAGE), parameter :: ALPHA_LDDRK = &
(/0.0_CUSTOM_REAL,-0.737101392796_CUSTOM_REAL, -1.634740794341_CUSTOM_REAL,&
-0.744739003780_CUSTOM_REAL,-1.469897351522_CUSTOM_REAL,-2.813971388035_CUSTOM_REAL/)
- real(kind=CUSTOM_REAL), dimension(NSTAGE) :: BETA_LDDRK = &
+ real(kind=CUSTOM_REAL), dimension(NSTAGE), parameter :: BETA_LDDRK = &
(/0.032918605146_CUSTOM_REAL,0.823256998200_CUSTOM_REAL,0.381530948900_CUSTOM_REAL,&
0.200092213184_CUSTOM_REAL,1.718581042715_CUSTOM_REAL,0.27_CUSTOM_REAL/)
- real(kind=CUSTOM_REAL), dimension(NSTAGE) :: C_LDDRK = &
+ real(kind=CUSTOM_REAL), dimension(NSTAGE), parameter :: C_LDDRK = &
(/0.0_CUSTOM_REAL,0.032918605146_CUSTOM_REAL,0.249351723343_CUSTOM_REAL,&
0.466911705055_CUSTOM_REAL,0.582030414044_CUSTOM_REAL,0.847252983783_CUSTOM_REAL/)
diff --git a/src/auxiliaries/combine_AVS_DX.f90 b/src/auxiliaries/combine_AVS_DX.f90
index a83250a..b17fdee 100644
--- a/src/auxiliaries/combine_AVS_DX.f90
+++ b/src/auxiliaries/combine_AVS_DX.f90
@@ -555,6 +555,12 @@
! from the lower-left corner, continuing to the lower-right corner and so on
do iloop_corners = 1,2
+ ipointnumber1_horiz = 0
+ ipointnumber2_horiz = 0
+
+ ipointnumber1_vert = 0
+ ipointnumber2_vert = 0
+
select case (iloop_corners)
case (1)
ipointnumber1_horiz = iglob1
diff --git a/src/gpu/prepare_mesh_constants_gpu.c b/src/gpu/prepare_mesh_constants_gpu.c
index 83b3c15..d8eecfa 100644
--- a/src/gpu/prepare_mesh_constants_gpu.c
+++ b/src/gpu/prepare_mesh_constants_gpu.c
@@ -249,7 +249,6 @@ void FC_FUNC_ (prepare_constants_device,
mp->anisotropic_kl = *ANISOTROPIC_KL_f;
mp->approximate_hess_kl = *APPROXIMATE_HESS_KL_f;
-
// mesh coloring flag
#ifdef USE_MESH_COLORING_GPU
mp->use_mesh_coloring_gpu = 1;
@@ -1895,7 +1894,6 @@ void FC_FUNC_ (prepare_outer_core_device,
// global indexing
gpuCreateCopy_todevice_int (&mp->d_ibool_outer_core, h_ibool, NGLL3 * (mp->NSPEC_OUTER_CORE));
-
// mesh locations
// always needed
diff --git a/src/meshfem3D/setup_MPI_interfaces.f90 b/src/meshfem3D/setup_MPI_interfaces.f90
index 471c629..68a740e 100644
--- a/src/meshfem3D/setup_MPI_interfaces.f90
+++ b/src/meshfem3D/setup_MPI_interfaces.f90
@@ -199,18 +199,18 @@
! stores MPI interfaces information
allocate(my_neighbours_crust_mantle(num_interfaces_crust_mantle), &
- nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
- stat=ier)
+ nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
+ stat=ier)
if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbours_crust_mantle etc.')
- my_neighbours_crust_mantle = -1
- nibool_interfaces_crust_mantle = 0
+ my_neighbours_crust_mantle(:) = -1
+ nibool_interfaces_crust_mantle(:) = 0
! copies interfaces arrays
if (num_interfaces_crust_mantle > 0) then
allocate(ibool_interfaces_crust_mantle(max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
- stat=ier)
+ stat=ier)
if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_crust_mantle')
- ibool_interfaces_crust_mantle = 0
+ ibool_interfaces_crust_mantle(:,:) = 0
! ranks of neighbour processes
my_neighbours_crust_mantle(:) = my_neighbours(1:num_interfaces_crust_mantle)
@@ -222,6 +222,7 @@
! dummy allocation (fortran90 should allow allocate statement with zero array size)
max_nibool_interfaces_cm = 0
allocate(ibool_interfaces_crust_mantle(0,0),stat=ier)
+ if (ier /= 0 ) call exit_mpi(myrank,'Error allocating dummy array ibool_interfaces_crust_mantle')
endif
! debug: outputs MPI interface
@@ -239,9 +240,9 @@
! checks addressing
call test_MPI_neighbours(IREGION_CRUST_MANTLE, &
- num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
- my_neighbours_crust_mantle,nibool_interfaces_crust_mantle, &
- ibool_interfaces_crust_mantle)
+ num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
+ my_neighbours_crust_mantle,nibool_interfaces_crust_mantle, &
+ ibool_interfaces_crust_mantle)
! checks with assembly of test fields
call test_MPI_cm()
@@ -295,19 +296,20 @@
! assembles values
call assemble_MPI_scalar_block(myrank,test_flag, &
- NGLOB_OUTER_CORE, &
- iproc_xi,iproc_eta,ichunk,addressing, &
- iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
- npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
- iboolfaces_outer_core,iboolcorner_outer_core, &
- iprocfrom_faces,iprocto_faces,imsg_type, &
- iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
- buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
- NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
- NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE), &
- NGLOB2DMAX_XY,NCHUNKS)
+ NGLOB_OUTER_CORE, &
+ iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_outer_core,iboolright_xi_outer_core, &
+ iboolleft_eta_outer_core,iboolright_eta_outer_core, &
+ npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
+ iboolfaces_outer_core,iboolcorner_outer_core, &
+ iprocfrom_faces,iprocto_faces,imsg_type, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
+ NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
+ NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
+ NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE), &
+ NGLOB2DMAX_XY,NCHUNKS)
! removes own myrank id (+1)
@@ -318,12 +320,12 @@
! determines neighbor rank for shared faces
call get_MPI_interfaces(myrank,NGLOB_OUTER_CORE,NSPEC_OUTER_CORE, &
- test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
- num_interfaces_outer_core,max_nibool_interfaces_oc, &
- max_nibool,MAX_NEIGHBOURS, &
- ibool,is_on_a_slice_edge, &
- IREGION_OUTER_CORE,.false.,dummy_i,INCLUDE_CENTRAL_CUBE, &
- xstore_outer_core,ystore_outer_core,zstore_outer_core,NPROCTOT)
+ test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
+ num_interfaces_outer_core,max_nibool_interfaces_oc, &
+ max_nibool,MAX_NEIGHBOURS, &
+ ibool,is_on_a_slice_edge, &
+ IREGION_OUTER_CORE,.false.,dummy_i,INCLUDE_CENTRAL_CUBE, &
+ xstore_outer_core,ystore_outer_core,zstore_outer_core,NPROCTOT)
deallocate(test_flag)
deallocate(dummy_i)
diff --git a/src/meshfem3D/test_MPI_interfaces.f90 b/src/meshfem3D/test_MPI_interfaces.f90
index b500757..50b1331 100644
--- a/src/meshfem3D/test_MPI_interfaces.f90
+++ b/src/meshfem3D/test_MPI_interfaces.f90
@@ -27,9 +27,9 @@
subroutine test_MPI_neighbours(iregion_code, &
- num_interfaces,max_nibool_interfaces, &
- my_neighbours,nibool_interfaces, &
- ibool_interfaces)
+ num_interfaces,max_nibool_interfaces, &
+ my_neighbours,nibool_interfaces, &
+ ibool_interfaces)
use constants
use meshfem3D_par,only: NPROCTOT,myrank
@@ -73,14 +73,15 @@
! allocates global mask
select case (iregion_code)
case (IREGION_CRUST_MANTLE)
- allocate(mask(NGLOB_CRUST_MANTLE))
+ allocate(mask(NGLOB_CRUST_MANTLE),stat=ier)
case (IREGION_OUTER_CORE)
- allocate(mask(NGLOB_OUTER_CORE))
+ allocate(mask(NGLOB_OUTER_CORE),stat=ier)
case (IREGION_INNER_CORE)
- allocate(mask(NGLOB_INNER_CORE))
+ allocate(mask(NGLOB_INNER_CORE),stat=ier)
case default
call exit_mpi(myrank,'Error test MPI: iregion_code not recognized')
end select
+ if (ier /= 0 ) call exit_mpi(myrank,'Error allocating mask for testing mpi neighbors')
! test ibool entries
! (must be non-zero and unique)
@@ -253,7 +254,7 @@
! crust mantle
allocate(test_flag_vector(NDIM,NGLOB_CRUST_MANTLE),stat=ier)
- if (ier /= 0 ) stop 'Error allocating array test_flag etc.'
+ if (ier /= 0 ) stop 'Error allocating array test_flag crust/mantle'
allocate(valence(NGLOB_CRUST_MANTLE),stat=ier)
if (ier /= 0 ) stop 'Error allocating array valence'
@@ -369,7 +370,7 @@
! outer core
allocate(test_flag(NGLOB_OUTER_CORE),stat=ier)
- if (ier /= 0 ) stop 'Error allocating array test_flag etc.'
+ if (ier /= 0 ) stop 'Error allocating array test_flag outer core'
allocate(valence(NGLOB_OUTER_CORE),stat=ier)
if (ier /= 0 ) stop 'Error allocating array valence'
@@ -478,7 +479,7 @@
! inner core
allocate(test_flag_vector(NDIM,NGLOB_INNER_CORE),stat=ier)
- if (ier /= 0 ) stop 'Error allocating array test_flag etc.'
+ if (ier /= 0 ) stop 'Error allocating array test_flag inner core'
allocate(valence(NGLOB_INNER_CORE),stat=ier)
if (ier /= 0 ) stop 'Error allocating array valence'
diff --git a/src/shared/save_header_file.F90 b/src/shared/save_header_file.F90
index 6de7a29..b8e3484 100644
--- a/src/shared/save_header_file.F90
+++ b/src/shared/save_header_file.F90
@@ -152,68 +152,68 @@
if (PRINT_INFO_TO_SCREEN) then
- print *
- print *,'edit file OUTPUT_FILES/values_from_mesher.h to see'
- print *,'some statistics about the mesh'
- print *
-
- print *,'number of processors = ',NPROCTOT
- print *
- print *,'maximum number of points per region = ',nglob(IREGION_CRUST_MANTLE)
- print *
- print *,'total elements per slice = ',sum(NSPEC)
- print *,'total points per slice = ',sum(nglob)
- print *
- print *,'the time step of the solver will be DT = ',sngl(DT)
- print *
- if (MOVIE_SURFACE .or. MOVIE_VOLUME) then
- print *,'MOVIE_VOLUME :',MOVIE_VOLUME
- print *,'MOVIE_SURFACE:',MOVIE_SURFACE
- print *,'Saving movie frames every',NTSTEP_BETWEEN_FRAMES
print *
- endif
- print *,'on NEC SX, make sure "loopcnt=" parameter'
+ print *,'edit file OUTPUT_FILES/values_from_mesher.h to see'
+ print *,'some statistics about the mesh'
+ print *
+
+ print *,'number of processors = ',NPROCTOT
+ print *
+ print *,'maximum number of points per region = ',nglob(IREGION_CRUST_MANTLE)
+ print *
+ print *,'total elements per slice = ',sum(NSPEC)
+ print *,'total points per slice = ',sum(nglob)
+ print *
+ print *,'the time step of the solver will be DT = ',sngl(DT)
+ print *
+ if (MOVIE_SURFACE .or. MOVIE_VOLUME) then
+ print *,'MOVIE_VOLUME :',MOVIE_VOLUME
+ print *,'MOVIE_SURFACE:',MOVIE_SURFACE
+ print *,'Saving movie frames every',NTSTEP_BETWEEN_FRAMES
+ print *
+ endif
+ print *,'on NEC SX, make sure "loopcnt=" parameter'
! use fused loops on NEC SX
- print *,'in Makefile is greater than max vector length = ',nglob(IREGION_CRUST_MANTLE)*NDIM
- print *
-
- print *,'approximate static memory needed by the solver:'
- print *,'----------------------------------------------'
- print *
- print *,'(lower bound, usually the real amount used is 5% to 10% higher)'
- print *
- print *,'(you can get a more precise estimate of the size used per MPI process'
- print *,' by typing "size -d bin/xspecfem3D"'
- print *,' after compiling the code with the DATA/Par_file you plan to use)'
- print *
- print *,'size of static arrays per slice = ',static_memory_size/1.d6,' MB'
- print *,' = ',static_memory_size/1048576.d0,' MiB'
- print *,' = ',static_memory_size/1.d9,' GB'
- print *,' = ',static_memory_size/1073741824.d0,' GiB'
- print *
+ print *,'in Makefile is greater than max vector length = ',nglob(IREGION_CRUST_MANTLE)*NDIM
+ print *
+
+ print *,'approximate static memory needed by the solver:'
+ print *,'----------------------------------------------'
+ print *
+ print *,'(lower bound, usually the real amount used is 5% to 10% higher)'
+ print *
+ print *,'(you can get a more precise estimate of the size used per MPI process'
+ print *,' by typing "size -d bin/xspecfem3D"'
+ print *,' after compiling the code with the DATA/Par_file you plan to use)'
+ print *
+ print *,'size of static arrays per slice = ',static_memory_size/1.d6,' MB'
+ print *,' = ',static_memory_size/1048576.d0,' MiB'
+ print *,' = ',static_memory_size/1.d9,' GB'
+ print *,' = ',static_memory_size/1073741824.d0,' GiB'
+ print *
! note: using less memory becomes an issue only if the strong scaling of the code is poor.
! Some users will run simulations with an executable using far less than 80% RAM per core
! if they prefer having a faster computational time (and use a higher number of cores).
- print *,' (should be below 80% or 90% of the memory installed per core)'
- print *,' (if significantly more, the job will not run by lack of memory)'
- print *,' (note that if significantly less, you waste a significant amount'
- print *,' of memory per processor core)'
- print *,' (but that can be perfectly acceptable if you can afford it and'
- print *,' want faster results by using more cores)'
- print *
- if (static_memory_size*dble(NPROCTOT)/1.d6 < 10000.d0) then
- print *,'size of static arrays for all slices = ',static_memory_size*dble(NPROCTOT)/1.d6,' MB'
- print *,' = ',static_memory_size*dble(NPROCTOT)/1048576.d0,' MiB'
- print *,' = ',static_memory_size*dble(NPROCTOT)/1.d9,' GB'
- else
- print *,'size of static arrays for all slices = ',static_memory_size*dble(NPROCTOT)/1.d9,' GB'
- endif
- print *,' = ',static_memory_size*dble(NPROCTOT)/1073741824.d0,' GiB'
- print *,' = ',static_memory_size*dble(NPROCTOT)/1.d12,' TB'
- print *,' = ',static_memory_size*dble(NPROCTOT)/1099511627776.d0,' TiB'
- print *
+ print *,' (should be below 80% or 90% of the memory installed per core)'
+ print *,' (if significantly more, the job will not run by lack of memory)'
+ print *,' (note that if significantly less, you waste a significant amount'
+ print *,' of memory per processor core)'
+ print *,' (but that can be perfectly acceptable if you can afford it and'
+ print *,' want faster results by using more cores)'
+ print *
+ if (static_memory_size*dble(NPROCTOT)/1.d6 < 10000.d0) then
+ print *,'size of static arrays for all slices = ',static_memory_size*dble(NPROCTOT)/1.d6,' MB'
+ print *,' = ',static_memory_size*dble(NPROCTOT)/1048576.d0,' MiB'
+ print *,' = ',static_memory_size*dble(NPROCTOT)/1.d9,' GB'
+ else
+ print *,'size of static arrays for all slices = ',static_memory_size*dble(NPROCTOT)/1.d9,' GB'
+ endif
+ print *,' = ',static_memory_size*dble(NPROCTOT)/1073741824.d0,' GiB'
+ print *,' = ',static_memory_size*dble(NPROCTOT)/1.d12,' TB'
+ print *,' = ',static_memory_size*dble(NPROCTOT)/1099511627776.d0,' TiB'
+ print *
endif ! of if (PRINT_INFO_TO_SCREEN)
@@ -259,36 +259,36 @@
if (PRINT_INFO_TO_SCREEN) then
- print *,'without undoing of attenuation you are using ',static_memory_size_GB,' GB per core'
- print *,' i.e. ',sngl(100.d0 * static_memory_size_GB / MEMORY_INSTALLED_PER_CORE_IN_GB),'% of the installed memory'
+ print *,'without undoing of attenuation you are using ',static_memory_size_GB,' GB per core'
+ print *,' i.e. ',sngl(100.d0 * static_memory_size_GB / MEMORY_INSTALLED_PER_CORE_IN_GB),'% of the installed memory'
- print *
- print *,'each time step to store in memory to undo attenuation will require storing ', &
- size_to_store_at_each_time_step,' GB per core'
- print *
- print *,'*******************************************************************************'
- print *,'the optimal value is thus NT_DUMP_ATTENUATION = ',NT_DUMP_ATTENUATION_optimal
- print *,'*******************************************************************************'
+ print *
+ print *,'each time step to store in memory to undo attenuation will require storing ', &
+ size_to_store_at_each_time_step,' GB per core'
+ print *
+ print *,'*******************************************************************************'
+ print *,'the optimal value is thus NT_DUMP_ATTENUATION = ',NT_DUMP_ATTENUATION_optimal
+ print *,'*******************************************************************************'
- print *
- print *,'we will need to save a total of ',number_of_dumpings_to_do,' dumpings (restart files) to disk'
+ print *
+ print *,'we will need to save a total of ',number_of_dumpings_to_do,' dumpings (restart files) to disk'
- print *
- print *,'each dumping on the disk to undo attenuation will require storing ',disk_size_of_each_dumping,' GB per core'
+ print *
+ print *,'each dumping on the disk to undo attenuation will require storing ',disk_size_of_each_dumping,' GB per core'
- print *
- print *,'each dumping on the disk will require storing ',disk_size_of_each_dumping*NPROCTOT,' GB for all cores'
+ print *
+ print *,'each dumping on the disk will require storing ',disk_size_of_each_dumping*NPROCTOT,' GB for all cores'
- print *
- print *,'ALL dumpings on the disk will require storing ',disk_size_of_each_dumping*number_of_dumpings_to_do,' GB per core'
+ print *
+ print *,'ALL dumpings on the disk will require storing ',disk_size_of_each_dumping*number_of_dumpings_to_do,' GB per core'
- print *
- print *,'*******************************************************************************'
- print *,'ALL dumpings on the disk will require storing ', &
- disk_size_of_each_dumping*number_of_dumpings_to_do*NPROCTOT,' GB for all cores'
- print *,' i.e. ',disk_size_of_each_dumping*number_of_dumpings_to_do*NPROCTOT/1000.d0,' TB'
- print *,'*******************************************************************************'
- print *
+ print *
+ print *,'*******************************************************************************'
+ print *,'ALL dumpings on the disk will require storing ', &
+ disk_size_of_each_dumping*number_of_dumpings_to_do*NPROCTOT,' GB for all cores'
+ print *,' i.e. ',disk_size_of_each_dumping*number_of_dumpings_to_do*NPROCTOT/1000.d0,' TB'
+ print *,'*******************************************************************************'
+ print *
endif
@@ -356,102 +356,102 @@
- 3.d0*subtract_central_cube_points
write(IOUT,*) '!'
-! display location of chunk if regional run
- if (NCHUNKS /= 6) then
-
- write(IOUT,*) '! position of the mesh chunk at the surface:'
- write(IOUT,*) '! -----------------------------------------'
- write(IOUT,*) '!'
- write(IOUT,*) '! angular size in first direction in degrees = ',sngl(ANGULAR_WIDTH_XI_IN_DEGREES)
- write(IOUT,*) '! angular size in second direction in degrees = ',sngl(ANGULAR_WIDTH_ETA_IN_DEGREES)
- write(IOUT,*) '!'
- write(IOUT,*) '! longitude of center in degrees = ',sngl(CENTER_LONGITUDE_IN_DEGREES)
- write(IOUT,*) '! latitude of center in degrees = ',sngl(CENTER_LATITUDE_IN_DEGREES)
- write(IOUT,*) '!'
- write(IOUT,*) '! angle of rotation of the first chunk = ',sngl(GAMMA_ROTATION_AZIMUTH)
-
! convert width to radians
ANGULAR_WIDTH_XI_RAD = ANGULAR_WIDTH_XI_IN_DEGREES * DEGREES_TO_RADIANS
ANGULAR_WIDTH_ETA_RAD = ANGULAR_WIDTH_ETA_IN_DEGREES * DEGREES_TO_RADIANS
+! display location of chunk if regional run
+ if (NCHUNKS /= 6) then
+
+ write(IOUT,*) '! position of the mesh chunk at the surface:'
+ write(IOUT,*) '! -----------------------------------------'
+ write(IOUT,*) '!'
+ write(IOUT,*) '! angular size in first direction in degrees = ',sngl(ANGULAR_WIDTH_XI_IN_DEGREES)
+ write(IOUT,*) '! angular size in second direction in degrees = ',sngl(ANGULAR_WIDTH_ETA_IN_DEGREES)
+ write(IOUT,*) '!'
+ write(IOUT,*) '! longitude of center in degrees = ',sngl(CENTER_LONGITUDE_IN_DEGREES)
+ write(IOUT,*) '! latitude of center in degrees = ',sngl(CENTER_LATITUDE_IN_DEGREES)
+ write(IOUT,*) '!'
+ write(IOUT,*) '! angle of rotation of the first chunk = ',sngl(GAMMA_ROTATION_AZIMUTH)
+
! compute rotation matrix from Euler angles
- call euler_angles(rotation_matrix,CENTER_LONGITUDE_IN_DEGREES,CENTER_LATITUDE_IN_DEGREES,GAMMA_ROTATION_AZIMUTH)
+ call euler_angles(rotation_matrix,CENTER_LONGITUDE_IN_DEGREES,CENTER_LATITUDE_IN_DEGREES,GAMMA_ROTATION_AZIMUTH)
! loop on the four corners of the chunk to display their coordinates
- icorner = 0
- do iy = 0,1
- do ix = 0,1
-
- icorner = icorner + 1
-
- xi= - ANGULAR_WIDTH_XI_RAD/2. + dble(ix)*ANGULAR_WIDTH_XI_RAD
- eta= - ANGULAR_WIDTH_ETA_RAD/2. + dble(iy)*ANGULAR_WIDTH_ETA_RAD
-
- x=dtan(xi)
- y=dtan(eta)
-
- gamma=ONE/dsqrt(ONE+x*x+y*y)
- rgt=R_UNIT_SPHERE*gamma
-
- ! define the mesh points at the top surface
- x_top=-y*rgt
- y_top=x*rgt
- z_top=rgt
-
- ! rotate top
- vector_ori(1) = x_top
- vector_ori(2) = y_top
- vector_ori(3) = z_top
- do i=1,3
- vector_rotated(i)=0.0d0
- do j=1,3
- vector_rotated(i)=vector_rotated(i)+rotation_matrix(i,j)*vector_ori(j)
+ icorner = 0
+ do iy = 0,1
+ do ix = 0,1
+
+ icorner = icorner + 1
+
+ xi= - ANGULAR_WIDTH_XI_RAD/2. + dble(ix)*ANGULAR_WIDTH_XI_RAD
+ eta= - ANGULAR_WIDTH_ETA_RAD/2. + dble(iy)*ANGULAR_WIDTH_ETA_RAD
+
+ x=dtan(xi)
+ y=dtan(eta)
+
+ gamma=ONE/dsqrt(ONE+x*x+y*y)
+ rgt=R_UNIT_SPHERE*gamma
+
+ ! define the mesh points at the top surface
+ x_top=-y*rgt
+ y_top=x*rgt
+ z_top=rgt
+
+ ! rotate top
+ vector_ori(1) = x_top
+ vector_ori(2) = y_top
+ vector_ori(3) = z_top
+ do i=1,3
+ vector_rotated(i)=0.0d0
+ do j=1,3
+ vector_rotated(i)=vector_rotated(i)+rotation_matrix(i,j)*vector_ori(j)
+ enddo
enddo
- enddo
- x_top = vector_rotated(1)
- y_top = vector_rotated(2)
- z_top = vector_rotated(3)
+ x_top = vector_rotated(1)
+ y_top = vector_rotated(2)
+ z_top = vector_rotated(3)
- ! convert to latitude and longitude
- call xyz_2_rthetaphi_dble(x_top,y_top,z_top,r_corner,theta_corner,phi_corner)
- call reduce(theta_corner,phi_corner)
+ ! convert to latitude and longitude
+ call xyz_2_rthetaphi_dble(x_top,y_top,z_top,r_corner,theta_corner,phi_corner)
+ call reduce(theta_corner,phi_corner)
- ! convert geocentric to geographic colatitude
- call geocentric_2_geographic_dble(theta_corner,colat_corner)
+ ! convert geocentric to geographic colatitude
+ call geocentric_2_geographic_dble(theta_corner,colat_corner)
- if (phi_corner>PI) phi_corner=phi_corner-TWO_PI
+ if (phi_corner>PI) phi_corner=phi_corner-TWO_PI
- ! compute real position of the source
- lat = (PI_OVER_TWO-colat_corner)*RADIANS_TO_DEGREES
- long = phi_corner*RADIANS_TO_DEGREES
+ ! compute real position of the source
+ lat = (PI_OVER_TWO-colat_corner)*RADIANS_TO_DEGREES
+ long = phi_corner*RADIANS_TO_DEGREES
- write(IOUT,*) '!'
- write(IOUT,*) '! corner ',icorner
- write(IOUT,*) '! longitude in degrees = ',long
- write(IOUT,*) '! latitude in degrees = ',lat
+ write(IOUT,*) '!'
+ write(IOUT,*) '! corner ',icorner
+ write(IOUT,*) '! longitude in degrees = ',long
+ write(IOUT,*) '! latitude in degrees = ',lat
+ enddo
enddo
- enddo
- write(IOUT,*) '!'
+ write(IOUT,*) '!'
endif ! regional chunk
! mesh averages
- if (NCHUNKS == 6 ) then
- ! global mesh, chunks of 90 degrees
- num_elem_gc = 4 * NEX_XI
- num_gll_gc = 4*NEX_XI*(NGLLX-1)
- avg_dist_deg = 360.d0 / dble(4) / dble(NEX_XI*(NGLLX-1))
- avg_dist_km = TWO_PI / dble(4) * R_EARTH_KM / dble(NEX_XI*(NGLLX-1))
- avg_element_size = TWO_PI / dble(4) * R_EARTH_KM / dble(NEX_XI)
- else
+ if (NCHUNKS /= 6 ) then
! regional mesh, variable chunk sizes
num_elem_gc = int( 90.d0 / ANGULAR_WIDTH_XI_IN_DEGREES * 4 * NEX_XI )
num_gll_gc = int( 90.d0 / ANGULAR_WIDTH_XI_IN_DEGREES * 4 * NEX_XI *(NGLLX-1) )
avg_dist_deg = max( ANGULAR_WIDTH_XI_RAD/NEX_XI,ANGULAR_WIDTH_ETA_RAD/NEX_ETA ) / dble(NGLLX-1)
avg_dist_km = max( ANGULAR_WIDTH_XI_RAD/NEX_XI,ANGULAR_WIDTH_ETA_RAD/NEX_ETA ) * R_EARTH_KM / dble(NGLLX-1)
avg_element_size = max( ANGULAR_WIDTH_XI_RAD/NEX_XI,ANGULAR_WIDTH_ETA_RAD/NEX_ETA ) * R_EARTH_KM
+ else
+ ! global mesh, chunks of 90 degrees
+ num_elem_gc = 4 * NEX_XI
+ num_gll_gc = 4*NEX_XI*(NGLLX-1)
+ avg_dist_deg = 360.d0 / dble(4) / dble(NEX_XI*(NGLLX-1))
+ avg_dist_km = TWO_PI / dble(4) * R_EARTH_KM / dble(NEX_XI*(NGLLX-1))
+ avg_element_size = TWO_PI / dble(4) * R_EARTH_KM / dble(NEX_XI)
endif
write(IOUT,*) '! resolution of the mesh at the surface:'
diff --git a/src/specfem3D/compute_forces_outer_core_Dev.F90 b/src/specfem3D/compute_forces_outer_core_Dev.F90
index 1edcdbb..c067995 100644
--- a/src/specfem3D/compute_forces_outer_core_Dev.F90
+++ b/src/specfem3D/compute_forces_outer_core_Dev.F90
@@ -119,6 +119,7 @@
#else
integer :: i,j,k
#endif
+ real(kind=CUSTOM_REAL), dimension(NSTAGE) :: MYALPHA_LDDRK,MYBETA_LDDRK
! ****************************************************
! big loop over all spectral elements in the fluid
@@ -137,6 +138,20 @@
num_elements = nspec_inner
endif
+ ! note: this is an OpenMP work-around for gfortran
+ ! gfortran has problems with parameters inside a DEFAULT(NONE) OpenMP block
+ ! for a fix see https://gcc.gnu.org/ml/fortran/2014-10/msg00064.html
+ !
+ ! we use local variables to cirumvent it, another possibility would be to use DEFAULT(SHARED)
+ !
+ ! FIRSTPRIVATE declaration is chosen based on suggestion for performance optimization:
+ ! see https://docs.oracle.com/cd/E19059-01/stud.10/819-0501/7_tuning.html
+
+ if (USE_LDDRK) then
+ MYALPHA_LDDRK(:) = ALPHA_LDDRK(:)
+ MYBETA_LDDRK(:) = BETA_LDDRK(:)
+ endif
+
!$OMP PARALLEL DEFAULT(NONE) &
!$OMP SHARED( &
!$OMP num_elements, phase_ispec_inner, iphase, ibool, displfluid, xstore, ystore, zstore, &
@@ -144,7 +159,7 @@
!$OMP gammax, gammay, gammaz, deltat, two_omega_earth, timeval, A_array_rotation, B_array_rotation, &
!$OMP minus_rho_g_over_kappa_fluid, wgll_cube, MOVIE_VOLUME, hprimewgll_xxT, hprimewgll_xx, &
!$OMP wgllwgll_yz_3D, wgllwgll_xz_3D, wgllwgll_xy_3D, accelfluid, USE_LDDRK, A_array_rotation_lddrk, &
-!$OMP istage, B_array_rotation_lddrk, div_displfluid, ALPHA_LDDRK, BETA_LDDRK ) &
+!$OMP istage, B_array_rotation_lddrk, div_displfluid ) &
!$OMP PRIVATE( &
!$OMP ispec_p, ispec, iglob, dummyx_loc, radius, theta, phi, &
!$OMP cos_theta, sin_theta, cos_phi, sin_phi, int_radius, &
@@ -154,7 +169,8 @@
!$OMP dpotentialdxl, tempx1, tempx3, dpotentialdyl, dpotentialdzl, two_omega_deltat, cos_two_omega_t, &
!$OMP sin_two_omega_t, source_euler_A, source_euler_B, A_rotation, B_rotation, ux_rotation, uy_rotation, &
!$OMP dpotentialdx_with_rot, dpotentialdy_with_rot, gxl, gyl, gzl, gravity_term, &
-!$OMP sum_terms, newtempx1, newtempx3, newtempx2 )
+!$OMP sum_terms, newtempx1, newtempx3, newtempx2 ) &
+!$OMP FIRSTPRIVATE( MYALPHA_LDDRK,MYBETA_LDDRK )
!$OMP DO SCHEDULE(GUIDED)
do ispec_p = 1,num_elements
@@ -422,15 +438,15 @@
! use the source saved above
DO_LOOP_IJK
- A_array_rotation_lddrk(INDEX_IJK,ispec) = ALPHA_LDDRK(istage) * A_array_rotation_lddrk(INDEX_IJK,ispec) &
+ A_array_rotation_lddrk(INDEX_IJK,ispec) = MYALPHA_LDDRK(istage) * A_array_rotation_lddrk(INDEX_IJK,ispec) &
+ source_euler_A(INDEX_IJK)
A_array_rotation(INDEX_IJK,ispec) = A_array_rotation(INDEX_IJK,ispec) &
- + BETA_LDDRK(istage) * A_array_rotation_lddrk(INDEX_IJK,ispec)
+ + MYBETA_LDDRK(istage) * A_array_rotation_lddrk(INDEX_IJK,ispec)
- B_array_rotation_lddrk(INDEX_IJK,ispec) = ALPHA_LDDRK(istage) * B_array_rotation_lddrk(INDEX_IJK,ispec) &
+ B_array_rotation_lddrk(INDEX_IJK,ispec) = MYALPHA_LDDRK(istage) * B_array_rotation_lddrk(INDEX_IJK,ispec) &
+ source_euler_B(INDEX_IJK)
B_array_rotation(INDEX_IJK,ispec) = B_array_rotation(INDEX_IJK,ispec) &
- + BETA_LDDRK(istage) * B_array_rotation_lddrk(INDEX_IJK,ispec)
+ + MYBETA_LDDRK(istage) * B_array_rotation_lddrk(INDEX_IJK,ispec)
ENDDO_LOOP_IJK
More information about the CIG-COMMITS
mailing list