[cig-commits] [commit] devel: avoids compiler warnings and adds parameter statement to LDDRK constants; needs a workaround in compute_forces_outer_core_Dev.F90 because of a gfortran OpenMP bug (560083c)

cig_noreply at geodynamics.org cig_noreply at geodynamics.org
Tue Nov 25 06:56:10 PST 2014


Repository : https://github.com/geodynamics/specfem3d_globe

On branch  : devel
Link       : https://github.com/geodynamics/specfem3d_globe/compare/0e5b55c6f30be94583639fd325373eecd6facc6d...8be3e0b0267c8d4cf5af3bc26e8903da17bc4fd1

>---------------------------------------------------------------

commit 560083c976e7e3cf090098611b659a22c28b5e80
Author: daniel peter <peterda at ethz.ch>
Date:   Sun Nov 9 00:18:40 2014 +0100

    avoids compiler warnings and adds parameter statement to LDDRK constants; needs a workaround in compute_forces_outer_core_Dev.F90 because of a gfortran OpenMP bug


>---------------------------------------------------------------

560083c976e7e3cf090098611b659a22c28b5e80
 setup/constants.h.in                            |   6 +-
 src/auxiliaries/combine_AVS_DX.f90              |   6 +
 src/gpu/prepare_mesh_constants_gpu.c            |   2 -
 src/meshfem3D/setup_MPI_interfaces.f90          |  58 ++---
 src/meshfem3D/test_MPI_interfaces.f90           |  19 +-
 src/shared/save_header_file.F90                 | 298 ++++++++++++------------
 src/specfem3D/compute_forces_outer_core_Dev.F90 |  28 ++-
 7 files changed, 220 insertions(+), 197 deletions(-)

diff --git a/setup/constants.h.in b/setup/constants.h.in
index 6e7bd77..2721a43 100644
--- a/setup/constants.h.in
+++ b/setup/constants.h.in
@@ -837,15 +837,15 @@
 !!-----------------------------------------------------------
   integer, parameter :: NSTAGE = 6
 
-  real(kind=CUSTOM_REAL), dimension(NSTAGE) :: ALPHA_LDDRK = &
+  real(kind=CUSTOM_REAL), dimension(NSTAGE), parameter :: ALPHA_LDDRK = &
     (/0.0_CUSTOM_REAL,-0.737101392796_CUSTOM_REAL, -1.634740794341_CUSTOM_REAL,&
       -0.744739003780_CUSTOM_REAL,-1.469897351522_CUSTOM_REAL,-2.813971388035_CUSTOM_REAL/)
 
-  real(kind=CUSTOM_REAL), dimension(NSTAGE) :: BETA_LDDRK = &
+  real(kind=CUSTOM_REAL), dimension(NSTAGE), parameter :: BETA_LDDRK = &
     (/0.032918605146_CUSTOM_REAL,0.823256998200_CUSTOM_REAL,0.381530948900_CUSTOM_REAL,&
       0.200092213184_CUSTOM_REAL,1.718581042715_CUSTOM_REAL,0.27_CUSTOM_REAL/)
 
-  real(kind=CUSTOM_REAL), dimension(NSTAGE) :: C_LDDRK = &
+  real(kind=CUSTOM_REAL), dimension(NSTAGE), parameter :: C_LDDRK = &
     (/0.0_CUSTOM_REAL,0.032918605146_CUSTOM_REAL,0.249351723343_CUSTOM_REAL,&
       0.466911705055_CUSTOM_REAL,0.582030414044_CUSTOM_REAL,0.847252983783_CUSTOM_REAL/)
 
diff --git a/src/auxiliaries/combine_AVS_DX.f90 b/src/auxiliaries/combine_AVS_DX.f90
index a83250a..b17fdee 100644
--- a/src/auxiliaries/combine_AVS_DX.f90
+++ b/src/auxiliaries/combine_AVS_DX.f90
@@ -555,6 +555,12 @@
 ! from the lower-left corner, continuing to the lower-right corner and so on
     do iloop_corners = 1,2
 
+    ipointnumber1_horiz = 0
+    ipointnumber2_horiz = 0
+
+    ipointnumber1_vert = 0
+    ipointnumber2_vert = 0
+
     select case (iloop_corners)
     case (1)
       ipointnumber1_horiz = iglob1
diff --git a/src/gpu/prepare_mesh_constants_gpu.c b/src/gpu/prepare_mesh_constants_gpu.c
index 83b3c15..d8eecfa 100644
--- a/src/gpu/prepare_mesh_constants_gpu.c
+++ b/src/gpu/prepare_mesh_constants_gpu.c
@@ -249,7 +249,6 @@ void FC_FUNC_ (prepare_constants_device,
   mp->anisotropic_kl = *ANISOTROPIC_KL_f;
   mp->approximate_hess_kl = *APPROXIMATE_HESS_KL_f;
 
-
   // mesh coloring flag
 #ifdef USE_MESH_COLORING_GPU
   mp->use_mesh_coloring_gpu = 1;
@@ -1895,7 +1894,6 @@ void FC_FUNC_ (prepare_outer_core_device,
   // global indexing
   gpuCreateCopy_todevice_int (&mp->d_ibool_outer_core, h_ibool, NGLL3 * (mp->NSPEC_OUTER_CORE));
 
-
   // mesh locations
 
   // always needed
diff --git a/src/meshfem3D/setup_MPI_interfaces.f90 b/src/meshfem3D/setup_MPI_interfaces.f90
index 471c629..68a740e 100644
--- a/src/meshfem3D/setup_MPI_interfaces.f90
+++ b/src/meshfem3D/setup_MPI_interfaces.f90
@@ -199,18 +199,18 @@
 
   ! stores MPI interfaces information
   allocate(my_neighbours_crust_mantle(num_interfaces_crust_mantle), &
-          nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
-          stat=ier)
+           nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
+           stat=ier)
   if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array my_neighbours_crust_mantle etc.')
-  my_neighbours_crust_mantle = -1
-  nibool_interfaces_crust_mantle = 0
+  my_neighbours_crust_mantle(:) = -1
+  nibool_interfaces_crust_mantle(:) = 0
 
   ! copies interfaces arrays
   if (num_interfaces_crust_mantle > 0) then
     allocate(ibool_interfaces_crust_mantle(max_nibool_interfaces_cm,num_interfaces_crust_mantle), &
-           stat=ier)
+             stat=ier)
     if (ier /= 0 ) call exit_mpi(myrank,'Error allocating array ibool_interfaces_crust_mantle')
-    ibool_interfaces_crust_mantle = 0
+    ibool_interfaces_crust_mantle(:,:) = 0
 
     ! ranks of neighbour processes
     my_neighbours_crust_mantle(:) = my_neighbours(1:num_interfaces_crust_mantle)
@@ -222,6 +222,7 @@
     ! dummy allocation (fortran90 should allow allocate statement with zero array size)
     max_nibool_interfaces_cm = 0
     allocate(ibool_interfaces_crust_mantle(0,0),stat=ier)
+    if (ier /= 0 ) call exit_mpi(myrank,'Error allocating dummy array ibool_interfaces_crust_mantle')
   endif
 
   ! debug: outputs MPI interface
@@ -239,9 +240,9 @@
 
   ! checks addressing
   call test_MPI_neighbours(IREGION_CRUST_MANTLE, &
-                              num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
-                              my_neighbours_crust_mantle,nibool_interfaces_crust_mantle, &
-                              ibool_interfaces_crust_mantle)
+                           num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
+                           my_neighbours_crust_mantle,nibool_interfaces_crust_mantle, &
+                           ibool_interfaces_crust_mantle)
 
   ! checks with assembly of test fields
   call test_MPI_cm()
@@ -295,19 +296,20 @@
 
   ! assembles values
   call assemble_MPI_scalar_block(myrank,test_flag, &
-            NGLOB_OUTER_CORE, &
-            iproc_xi,iproc_eta,ichunk,addressing, &
-            iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
-            npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
-            iboolfaces_outer_core,iboolcorner_outer_core, &
-            iprocfrom_faces,iprocto_faces,imsg_type, &
-            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
-            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
-            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
-            NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
-            NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE), &
-            NGLOB2DMAX_XY,NCHUNKS)
+                                 NGLOB_OUTER_CORE, &
+                                 iproc_xi,iproc_eta,ichunk,addressing, &
+                                 iboolleft_xi_outer_core,iboolright_xi_outer_core, &
+                                 iboolleft_eta_outer_core,iboolright_eta_outer_core, &
+                                 npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
+                                 iboolfaces_outer_core,iboolcorner_outer_core, &
+                                 iprocfrom_faces,iprocto_faces,imsg_type, &
+                                 iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+                                 buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
+                                 buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
+                                 NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
+                                 NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
+                                 NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE), &
+                                 NGLOB2DMAX_XY,NCHUNKS)
 
 
   ! removes own myrank id (+1)
@@ -318,12 +320,12 @@
 
   ! determines neighbor rank for shared faces
   call get_MPI_interfaces(myrank,NGLOB_OUTER_CORE,NSPEC_OUTER_CORE, &
-                            test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
-                            num_interfaces_outer_core,max_nibool_interfaces_oc, &
-                            max_nibool,MAX_NEIGHBOURS, &
-                            ibool,is_on_a_slice_edge, &
-                            IREGION_OUTER_CORE,.false.,dummy_i,INCLUDE_CENTRAL_CUBE, &
-                            xstore_outer_core,ystore_outer_core,zstore_outer_core,NPROCTOT)
+                          test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
+                          num_interfaces_outer_core,max_nibool_interfaces_oc, &
+                          max_nibool,MAX_NEIGHBOURS, &
+                          ibool,is_on_a_slice_edge, &
+                          IREGION_OUTER_CORE,.false.,dummy_i,INCLUDE_CENTRAL_CUBE, &
+                          xstore_outer_core,ystore_outer_core,zstore_outer_core,NPROCTOT)
 
   deallocate(test_flag)
   deallocate(dummy_i)
diff --git a/src/meshfem3D/test_MPI_interfaces.f90 b/src/meshfem3D/test_MPI_interfaces.f90
index b500757..50b1331 100644
--- a/src/meshfem3D/test_MPI_interfaces.f90
+++ b/src/meshfem3D/test_MPI_interfaces.f90
@@ -27,9 +27,9 @@
 
 
   subroutine test_MPI_neighbours(iregion_code, &
-                                     num_interfaces,max_nibool_interfaces, &
-                                     my_neighbours,nibool_interfaces, &
-                                     ibool_interfaces)
+                                 num_interfaces,max_nibool_interfaces, &
+                                 my_neighbours,nibool_interfaces, &
+                                 ibool_interfaces)
 
   use constants
   use meshfem3D_par,only: NPROCTOT,myrank
@@ -73,14 +73,15 @@
   ! allocates global mask
   select case (iregion_code)
   case (IREGION_CRUST_MANTLE)
-    allocate(mask(NGLOB_CRUST_MANTLE))
+    allocate(mask(NGLOB_CRUST_MANTLE),stat=ier)
   case (IREGION_OUTER_CORE)
-    allocate(mask(NGLOB_OUTER_CORE))
+    allocate(mask(NGLOB_OUTER_CORE),stat=ier)
   case (IREGION_INNER_CORE)
-    allocate(mask(NGLOB_INNER_CORE))
+    allocate(mask(NGLOB_INNER_CORE),stat=ier)
   case default
     call exit_mpi(myrank,'Error test MPI: iregion_code not recognized')
   end select
+  if (ier /= 0 ) call exit_mpi(myrank,'Error allocating mask for testing mpi neighbors')
 
   ! test ibool entries
   ! (must be non-zero and unique)
@@ -253,7 +254,7 @@
 
   ! crust mantle
   allocate(test_flag_vector(NDIM,NGLOB_CRUST_MANTLE),stat=ier)
-  if (ier /= 0 ) stop 'Error allocating array test_flag etc.'
+  if (ier /= 0 ) stop 'Error allocating array test_flag crust/mantle'
   allocate(valence(NGLOB_CRUST_MANTLE),stat=ier)
   if (ier /= 0 ) stop 'Error allocating array valence'
 
@@ -369,7 +370,7 @@
 
   ! outer core
   allocate(test_flag(NGLOB_OUTER_CORE),stat=ier)
-  if (ier /= 0 ) stop 'Error allocating array test_flag etc.'
+  if (ier /= 0 ) stop 'Error allocating array test_flag outer core'
   allocate(valence(NGLOB_OUTER_CORE),stat=ier)
   if (ier /= 0 ) stop 'Error allocating array valence'
 
@@ -478,7 +479,7 @@
 
   ! inner core
   allocate(test_flag_vector(NDIM,NGLOB_INNER_CORE),stat=ier)
-  if (ier /= 0 ) stop 'Error allocating array test_flag etc.'
+  if (ier /= 0 ) stop 'Error allocating array test_flag inner core'
   allocate(valence(NGLOB_INNER_CORE),stat=ier)
   if (ier /= 0 ) stop 'Error allocating array valence'
 
diff --git a/src/shared/save_header_file.F90 b/src/shared/save_header_file.F90
index 6de7a29..b8e3484 100644
--- a/src/shared/save_header_file.F90
+++ b/src/shared/save_header_file.F90
@@ -152,68 +152,68 @@
 
   if (PRINT_INFO_TO_SCREEN) then
 
-  print *
-  print *,'edit file OUTPUT_FILES/values_from_mesher.h to see'
-  print *,'some statistics about the mesh'
-  print *
-
-  print *,'number of processors = ',NPROCTOT
-  print *
-  print *,'maximum number of points per region = ',nglob(IREGION_CRUST_MANTLE)
-  print *
-  print *,'total elements per slice = ',sum(NSPEC)
-  print *,'total points per slice = ',sum(nglob)
-  print *
-  print *,'the time step of the solver will be DT = ',sngl(DT)
-  print *
-  if (MOVIE_SURFACE .or. MOVIE_VOLUME) then
-    print *,'MOVIE_VOLUME :',MOVIE_VOLUME
-    print *,'MOVIE_SURFACE:',MOVIE_SURFACE
-    print *,'Saving movie frames every',NTSTEP_BETWEEN_FRAMES
     print *
-  endif
-  print *,'on NEC SX, make sure "loopcnt=" parameter'
+    print *,'edit file OUTPUT_FILES/values_from_mesher.h to see'
+    print *,'some statistics about the mesh'
+    print *
+
+    print *,'number of processors = ',NPROCTOT
+    print *
+    print *,'maximum number of points per region = ',nglob(IREGION_CRUST_MANTLE)
+    print *
+    print *,'total elements per slice = ',sum(NSPEC)
+    print *,'total points per slice = ',sum(nglob)
+    print *
+    print *,'the time step of the solver will be DT = ',sngl(DT)
+    print *
+    if (MOVIE_SURFACE .or. MOVIE_VOLUME) then
+      print *,'MOVIE_VOLUME :',MOVIE_VOLUME
+      print *,'MOVIE_SURFACE:',MOVIE_SURFACE
+      print *,'Saving movie frames every',NTSTEP_BETWEEN_FRAMES
+      print *
+    endif
+    print *,'on NEC SX, make sure "loopcnt=" parameter'
 ! use fused loops on NEC SX
-  print *,'in Makefile is greater than max vector length = ',nglob(IREGION_CRUST_MANTLE)*NDIM
-  print *
-
-  print *,'approximate static memory needed by the solver:'
-  print *,'----------------------------------------------'
-  print *
-  print *,'(lower bound, usually the real amount used is 5% to 10% higher)'
-  print *
-  print *,'(you can get a more precise estimate of the size used per MPI process'
-  print *,' by typing "size -d bin/xspecfem3D"'
-  print *,' after compiling the code with the DATA/Par_file you plan to use)'
-  print *
-  print *,'size of static arrays per slice = ',static_memory_size/1.d6,' MB'
-  print *,'                                = ',static_memory_size/1048576.d0,' MiB'
-  print *,'                                = ',static_memory_size/1.d9,' GB'
-  print *,'                                = ',static_memory_size/1073741824.d0,' GiB'
-  print *
+    print *,'in Makefile is greater than max vector length = ',nglob(IREGION_CRUST_MANTLE)*NDIM
+    print *
+
+    print *,'approximate static memory needed by the solver:'
+    print *,'----------------------------------------------'
+    print *
+    print *,'(lower bound, usually the real amount used is 5% to 10% higher)'
+    print *
+    print *,'(you can get a more precise estimate of the size used per MPI process'
+    print *,' by typing "size -d bin/xspecfem3D"'
+    print *,' after compiling the code with the DATA/Par_file you plan to use)'
+    print *
+    print *,'size of static arrays per slice = ',static_memory_size/1.d6,' MB'
+    print *,'                                = ',static_memory_size/1048576.d0,' MiB'
+    print *,'                                = ',static_memory_size/1.d9,' GB'
+    print *,'                                = ',static_memory_size/1073741824.d0,' GiB'
+    print *
 
 ! note: using less memory becomes an issue only if the strong scaling of the code is poor.
 !          Some users will run simulations with an executable using far less than 80% RAM per core
 !          if they prefer having a faster computational time (and use a higher number of cores).
 
-  print *,'   (should be below 80% or 90% of the memory installed per core)'
-  print *,'   (if significantly more, the job will not run by lack of memory)'
-  print *,'   (note that if significantly less, you waste a significant amount'
-  print *,'    of memory per processor core)'
-  print *,'   (but that can be perfectly acceptable if you can afford it and'
-  print *,'    want faster results by using more cores)'
-  print *
-  if (static_memory_size*dble(NPROCTOT)/1.d6 < 10000.d0) then
-    print *,'size of static arrays for all slices = ',static_memory_size*dble(NPROCTOT)/1.d6,' MB'
-    print *,'                                     = ',static_memory_size*dble(NPROCTOT)/1048576.d0,' MiB'
-    print *,'                                     = ',static_memory_size*dble(NPROCTOT)/1.d9,' GB'
-  else
-    print *,'size of static arrays for all slices = ',static_memory_size*dble(NPROCTOT)/1.d9,' GB'
-  endif
-  print *,'                                     = ',static_memory_size*dble(NPROCTOT)/1073741824.d0,' GiB'
-  print *,'                                     = ',static_memory_size*dble(NPROCTOT)/1.d12,' TB'
-  print *,'                                     = ',static_memory_size*dble(NPROCTOT)/1099511627776.d0,' TiB'
-  print *
+    print *,'   (should be below 80% or 90% of the memory installed per core)'
+    print *,'   (if significantly more, the job will not run by lack of memory)'
+    print *,'   (note that if significantly less, you waste a significant amount'
+    print *,'    of memory per processor core)'
+    print *,'   (but that can be perfectly acceptable if you can afford it and'
+    print *,'    want faster results by using more cores)'
+    print *
+    if (static_memory_size*dble(NPROCTOT)/1.d6 < 10000.d0) then
+      print *,'size of static arrays for all slices = ',static_memory_size*dble(NPROCTOT)/1.d6,' MB'
+      print *,'                                     = ',static_memory_size*dble(NPROCTOT)/1048576.d0,' MiB'
+      print *,'                                     = ',static_memory_size*dble(NPROCTOT)/1.d9,' GB'
+    else
+      print *,'size of static arrays for all slices = ',static_memory_size*dble(NPROCTOT)/1.d9,' GB'
+    endif
+    print *,'                                     = ',static_memory_size*dble(NPROCTOT)/1073741824.d0,' GiB'
+    print *,'                                     = ',static_memory_size*dble(NPROCTOT)/1.d12,' TB'
+    print *,'                                     = ',static_memory_size*dble(NPROCTOT)/1099511627776.d0,' TiB'
+    print *
 
   endif ! of if (PRINT_INFO_TO_SCREEN)
 
@@ -259,36 +259,36 @@
 
     if (PRINT_INFO_TO_SCREEN) then
 
-    print *,'without undoing of attenuation you are using ',static_memory_size_GB,' GB per core'
-    print *,'  i.e. ',sngl(100.d0 * static_memory_size_GB / MEMORY_INSTALLED_PER_CORE_IN_GB),'% of the installed memory'
+      print *,'without undoing of attenuation you are using ',static_memory_size_GB,' GB per core'
+      print *,'  i.e. ',sngl(100.d0 * static_memory_size_GB / MEMORY_INSTALLED_PER_CORE_IN_GB),'% of the installed memory'
 
-    print *
-    print *,'each time step to store in memory to undo attenuation will require storing ', &
-                  size_to_store_at_each_time_step,' GB per core'
-    print *
-    print *,'*******************************************************************************'
-    print *,'the optimal value is thus NT_DUMP_ATTENUATION = ',NT_DUMP_ATTENUATION_optimal
-    print *,'*******************************************************************************'
+      print *
+      print *,'each time step to store in memory to undo attenuation will require storing ', &
+                    size_to_store_at_each_time_step,' GB per core'
+      print *
+      print *,'*******************************************************************************'
+      print *,'the optimal value is thus NT_DUMP_ATTENUATION = ',NT_DUMP_ATTENUATION_optimal
+      print *,'*******************************************************************************'
 
-    print *
-    print *,'we will need to save a total of ',number_of_dumpings_to_do,' dumpings (restart files) to disk'
+      print *
+      print *,'we will need to save a total of ',number_of_dumpings_to_do,' dumpings (restart files) to disk'
 
-    print *
-    print *,'each dumping on the disk to undo attenuation will require storing ',disk_size_of_each_dumping,' GB per core'
+      print *
+      print *,'each dumping on the disk to undo attenuation will require storing ',disk_size_of_each_dumping,' GB per core'
 
-    print *
-    print *,'each dumping on the disk will require storing ',disk_size_of_each_dumping*NPROCTOT,' GB for all cores'
+      print *
+      print *,'each dumping on the disk will require storing ',disk_size_of_each_dumping*NPROCTOT,' GB for all cores'
 
-    print *
-    print *,'ALL dumpings on the disk will require storing ',disk_size_of_each_dumping*number_of_dumpings_to_do,' GB per core'
+      print *
+      print *,'ALL dumpings on the disk will require storing ',disk_size_of_each_dumping*number_of_dumpings_to_do,' GB per core'
 
-    print *
-    print *,'*******************************************************************************'
-    print *,'ALL dumpings on the disk will require storing ', &
-                   disk_size_of_each_dumping*number_of_dumpings_to_do*NPROCTOT,' GB for all cores'
-    print *,'  i.e. ',disk_size_of_each_dumping*number_of_dumpings_to_do*NPROCTOT/1000.d0,' TB'
-    print *,'*******************************************************************************'
-    print *
+      print *
+      print *,'*******************************************************************************'
+      print *,'ALL dumpings on the disk will require storing ', &
+                     disk_size_of_each_dumping*number_of_dumpings_to_do*NPROCTOT,' GB for all cores'
+      print *,'  i.e. ',disk_size_of_each_dumping*number_of_dumpings_to_do*NPROCTOT/1000.d0,' TB'
+      print *,'*******************************************************************************'
+      print *
 
     endif
 
@@ -356,102 +356,102 @@
     - 3.d0*subtract_central_cube_points
   write(IOUT,*) '!'
 
-! display location of chunk if regional run
-  if (NCHUNKS /= 6) then
-
-  write(IOUT,*) '! position of the mesh chunk at the surface:'
-  write(IOUT,*) '! -----------------------------------------'
-  write(IOUT,*) '!'
-  write(IOUT,*) '! angular size in first direction in degrees = ',sngl(ANGULAR_WIDTH_XI_IN_DEGREES)
-  write(IOUT,*) '! angular size in second direction in degrees = ',sngl(ANGULAR_WIDTH_ETA_IN_DEGREES)
-  write(IOUT,*) '!'
-  write(IOUT,*) '! longitude of center in degrees = ',sngl(CENTER_LONGITUDE_IN_DEGREES)
-  write(IOUT,*) '! latitude of center in degrees = ',sngl(CENTER_LATITUDE_IN_DEGREES)
-  write(IOUT,*) '!'
-  write(IOUT,*) '! angle of rotation of the first chunk = ',sngl(GAMMA_ROTATION_AZIMUTH)
-
 ! convert width to radians
   ANGULAR_WIDTH_XI_RAD = ANGULAR_WIDTH_XI_IN_DEGREES * DEGREES_TO_RADIANS
   ANGULAR_WIDTH_ETA_RAD = ANGULAR_WIDTH_ETA_IN_DEGREES * DEGREES_TO_RADIANS
 
+! display location of chunk if regional run
+  if (NCHUNKS /= 6) then
+
+    write(IOUT,*) '! position of the mesh chunk at the surface:'
+    write(IOUT,*) '! -----------------------------------------'
+    write(IOUT,*) '!'
+    write(IOUT,*) '! angular size in first direction in degrees = ',sngl(ANGULAR_WIDTH_XI_IN_DEGREES)
+    write(IOUT,*) '! angular size in second direction in degrees = ',sngl(ANGULAR_WIDTH_ETA_IN_DEGREES)
+    write(IOUT,*) '!'
+    write(IOUT,*) '! longitude of center in degrees = ',sngl(CENTER_LONGITUDE_IN_DEGREES)
+    write(IOUT,*) '! latitude of center in degrees = ',sngl(CENTER_LATITUDE_IN_DEGREES)
+    write(IOUT,*) '!'
+    write(IOUT,*) '! angle of rotation of the first chunk = ',sngl(GAMMA_ROTATION_AZIMUTH)
+
 ! compute rotation matrix from Euler angles
-  call euler_angles(rotation_matrix,CENTER_LONGITUDE_IN_DEGREES,CENTER_LATITUDE_IN_DEGREES,GAMMA_ROTATION_AZIMUTH)
+    call euler_angles(rotation_matrix,CENTER_LONGITUDE_IN_DEGREES,CENTER_LATITUDE_IN_DEGREES,GAMMA_ROTATION_AZIMUTH)
 
 ! loop on the four corners of the chunk to display their coordinates
-  icorner = 0
-  do iy = 0,1
-    do ix = 0,1
-
-    icorner = icorner + 1
-
-    xi= - ANGULAR_WIDTH_XI_RAD/2. + dble(ix)*ANGULAR_WIDTH_XI_RAD
-    eta= - ANGULAR_WIDTH_ETA_RAD/2. + dble(iy)*ANGULAR_WIDTH_ETA_RAD
-
-    x=dtan(xi)
-    y=dtan(eta)
-
-    gamma=ONE/dsqrt(ONE+x*x+y*y)
-    rgt=R_UNIT_SPHERE*gamma
-
-    ! define the mesh points at the top surface
-    x_top=-y*rgt
-    y_top=x*rgt
-    z_top=rgt
-
-    ! rotate top
-    vector_ori(1) = x_top
-    vector_ori(2) = y_top
-    vector_ori(3) = z_top
-    do i=1,3
-      vector_rotated(i)=0.0d0
-      do j=1,3
-        vector_rotated(i)=vector_rotated(i)+rotation_matrix(i,j)*vector_ori(j)
+    icorner = 0
+    do iy = 0,1
+      do ix = 0,1
+
+      icorner = icorner + 1
+
+      xi= - ANGULAR_WIDTH_XI_RAD/2. + dble(ix)*ANGULAR_WIDTH_XI_RAD
+      eta= - ANGULAR_WIDTH_ETA_RAD/2. + dble(iy)*ANGULAR_WIDTH_ETA_RAD
+
+      x=dtan(xi)
+      y=dtan(eta)
+
+      gamma=ONE/dsqrt(ONE+x*x+y*y)
+      rgt=R_UNIT_SPHERE*gamma
+
+      ! define the mesh points at the top surface
+      x_top=-y*rgt
+      y_top=x*rgt
+      z_top=rgt
+
+      ! rotate top
+      vector_ori(1) = x_top
+      vector_ori(2) = y_top
+      vector_ori(3) = z_top
+      do i=1,3
+        vector_rotated(i)=0.0d0
+        do j=1,3
+          vector_rotated(i)=vector_rotated(i)+rotation_matrix(i,j)*vector_ori(j)
+        enddo
       enddo
-    enddo
-    x_top = vector_rotated(1)
-    y_top = vector_rotated(2)
-    z_top = vector_rotated(3)
+      x_top = vector_rotated(1)
+      y_top = vector_rotated(2)
+      z_top = vector_rotated(3)
 
-    ! convert to latitude and longitude
-    call xyz_2_rthetaphi_dble(x_top,y_top,z_top,r_corner,theta_corner,phi_corner)
-    call reduce(theta_corner,phi_corner)
+      ! convert to latitude and longitude
+      call xyz_2_rthetaphi_dble(x_top,y_top,z_top,r_corner,theta_corner,phi_corner)
+      call reduce(theta_corner,phi_corner)
 
-    ! convert geocentric to geographic colatitude
-    call geocentric_2_geographic_dble(theta_corner,colat_corner)
+      ! convert geocentric to geographic colatitude
+      call geocentric_2_geographic_dble(theta_corner,colat_corner)
 
-    if (phi_corner>PI) phi_corner=phi_corner-TWO_PI
+      if (phi_corner>PI) phi_corner=phi_corner-TWO_PI
 
-    ! compute real position of the source
-    lat = (PI_OVER_TWO-colat_corner)*RADIANS_TO_DEGREES
-    long = phi_corner*RADIANS_TO_DEGREES
+      ! compute real position of the source
+      lat = (PI_OVER_TWO-colat_corner)*RADIANS_TO_DEGREES
+      long = phi_corner*RADIANS_TO_DEGREES
 
-    write(IOUT,*) '!'
-    write(IOUT,*) '! corner ',icorner
-    write(IOUT,*) '! longitude in degrees = ',long
-    write(IOUT,*) '! latitude in degrees = ',lat
+      write(IOUT,*) '!'
+      write(IOUT,*) '! corner ',icorner
+      write(IOUT,*) '! longitude in degrees = ',long
+      write(IOUT,*) '! latitude in degrees = ',lat
 
+      enddo
     enddo
-  enddo
 
-  write(IOUT,*) '!'
+    write(IOUT,*) '!'
 
   endif  ! regional chunk
 
   ! mesh averages
-  if (NCHUNKS == 6 ) then
-    ! global mesh, chunks of 90 degrees
-    num_elem_gc = 4 * NEX_XI
-    num_gll_gc = 4*NEX_XI*(NGLLX-1)
-    avg_dist_deg = 360.d0 / dble(4) / dble(NEX_XI*(NGLLX-1))
-    avg_dist_km = TWO_PI / dble(4) * R_EARTH_KM / dble(NEX_XI*(NGLLX-1))
-    avg_element_size = TWO_PI / dble(4) * R_EARTH_KM / dble(NEX_XI)
-  else
+  if (NCHUNKS /= 6 ) then
     ! regional mesh, variable chunk sizes
     num_elem_gc = int( 90.d0 / ANGULAR_WIDTH_XI_IN_DEGREES * 4 * NEX_XI )
     num_gll_gc = int( 90.d0 / ANGULAR_WIDTH_XI_IN_DEGREES * 4 * NEX_XI *(NGLLX-1) )
     avg_dist_deg = max( ANGULAR_WIDTH_XI_RAD/NEX_XI,ANGULAR_WIDTH_ETA_RAD/NEX_ETA ) / dble(NGLLX-1)
     avg_dist_km = max( ANGULAR_WIDTH_XI_RAD/NEX_XI,ANGULAR_WIDTH_ETA_RAD/NEX_ETA ) * R_EARTH_KM / dble(NGLLX-1)
     avg_element_size = max( ANGULAR_WIDTH_XI_RAD/NEX_XI,ANGULAR_WIDTH_ETA_RAD/NEX_ETA ) * R_EARTH_KM
+  else
+    ! global mesh, chunks of 90 degrees
+    num_elem_gc = 4 * NEX_XI
+    num_gll_gc = 4*NEX_XI*(NGLLX-1)
+    avg_dist_deg = 360.d0 / dble(4) / dble(NEX_XI*(NGLLX-1))
+    avg_dist_km = TWO_PI / dble(4) * R_EARTH_KM / dble(NEX_XI*(NGLLX-1))
+    avg_element_size = TWO_PI / dble(4) * R_EARTH_KM / dble(NEX_XI)
   endif
 
   write(IOUT,*) '! resolution of the mesh at the surface:'
diff --git a/src/specfem3D/compute_forces_outer_core_Dev.F90 b/src/specfem3D/compute_forces_outer_core_Dev.F90
index 1edcdbb..c067995 100644
--- a/src/specfem3D/compute_forces_outer_core_Dev.F90
+++ b/src/specfem3D/compute_forces_outer_core_Dev.F90
@@ -119,6 +119,7 @@
 #else
   integer :: i,j,k
 #endif
+  real(kind=CUSTOM_REAL), dimension(NSTAGE) :: MYALPHA_LDDRK,MYBETA_LDDRK
 
 ! ****************************************************
 !   big loop over all spectral elements in the fluid
@@ -137,6 +138,20 @@
     num_elements = nspec_inner
   endif
 
+  ! note: this is an OpenMP work-around for gfortran
+  ! gfortran has problems with parameters inside a DEFAULT(NONE) OpenMP block 
+  ! for a fix see https://gcc.gnu.org/ml/fortran/2014-10/msg00064.html
+  !
+  ! we use local variables to cirumvent it, another possibility would be to use DEFAULT(SHARED)
+  !
+  ! FIRSTPRIVATE declaration is chosen based on suggestion for performance optimization:
+  ! see https://docs.oracle.com/cd/E19059-01/stud.10/819-0501/7_tuning.html
+
+  if (USE_LDDRK) then
+    MYALPHA_LDDRK(:) = ALPHA_LDDRK(:)
+    MYBETA_LDDRK(:) = BETA_LDDRK(:)
+  endif
+
 !$OMP PARALLEL DEFAULT(NONE) &
 !$OMP SHARED( &
 !$OMP num_elements, phase_ispec_inner, iphase, ibool, displfluid, xstore, ystore, zstore, &
@@ -144,7 +159,7 @@
 !$OMP gammax, gammay, gammaz, deltat, two_omega_earth, timeval, A_array_rotation, B_array_rotation, &
 !$OMP minus_rho_g_over_kappa_fluid, wgll_cube, MOVIE_VOLUME, hprimewgll_xxT, hprimewgll_xx, &
 !$OMP wgllwgll_yz_3D, wgllwgll_xz_3D, wgllwgll_xy_3D, accelfluid, USE_LDDRK, A_array_rotation_lddrk, &
-!$OMP istage, B_array_rotation_lddrk, div_displfluid, ALPHA_LDDRK, BETA_LDDRK ) &
+!$OMP istage, B_array_rotation_lddrk, div_displfluid ) &
 !$OMP PRIVATE( &
 !$OMP ispec_p, ispec, iglob, dummyx_loc, radius, theta, phi, &
 !$OMP cos_theta, sin_theta, cos_phi, sin_phi, int_radius, &
@@ -154,7 +169,8 @@
 !$OMP dpotentialdxl, tempx1, tempx3, dpotentialdyl, dpotentialdzl, two_omega_deltat, cos_two_omega_t, &
 !$OMP sin_two_omega_t, source_euler_A, source_euler_B, A_rotation, B_rotation, ux_rotation, uy_rotation, &
 !$OMP dpotentialdx_with_rot, dpotentialdy_with_rot, gxl, gyl, gzl, gravity_term, &
-!$OMP sum_terms, newtempx1, newtempx3, newtempx2 )
+!$OMP sum_terms, newtempx1, newtempx3, newtempx2 ) &
+!$OMP FIRSTPRIVATE( MYALPHA_LDDRK,MYBETA_LDDRK )
 
 !$OMP DO SCHEDULE(GUIDED)
   do ispec_p = 1,num_elements
@@ -422,15 +438,15 @@
         ! use the source saved above
         DO_LOOP_IJK
 
-          A_array_rotation_lddrk(INDEX_IJK,ispec) = ALPHA_LDDRK(istage) * A_array_rotation_lddrk(INDEX_IJK,ispec) &
+          A_array_rotation_lddrk(INDEX_IJK,ispec) = MYALPHA_LDDRK(istage) * A_array_rotation_lddrk(INDEX_IJK,ispec) &
                                                     + source_euler_A(INDEX_IJK)
           A_array_rotation(INDEX_IJK,ispec) = A_array_rotation(INDEX_IJK,ispec) &
-                                              + BETA_LDDRK(istage) * A_array_rotation_lddrk(INDEX_IJK,ispec)
+                                              + MYBETA_LDDRK(istage) * A_array_rotation_lddrk(INDEX_IJK,ispec)
 
-          B_array_rotation_lddrk(INDEX_IJK,ispec) = ALPHA_LDDRK(istage) * B_array_rotation_lddrk(INDEX_IJK,ispec) &
+          B_array_rotation_lddrk(INDEX_IJK,ispec) = MYALPHA_LDDRK(istage) * B_array_rotation_lddrk(INDEX_IJK,ispec) &
                                                     + source_euler_B(INDEX_IJK)
           B_array_rotation(INDEX_IJK,ispec) = B_array_rotation(INDEX_IJK,ispec) &
-                                              + BETA_LDDRK(istage) * B_array_rotation_lddrk(INDEX_IJK,ispec)
+                                              + MYBETA_LDDRK(istage) * B_array_rotation_lddrk(INDEX_IJK,ispec)
 
         ENDDO_LOOP_IJK
 



More information about the CIG-COMMITS mailing list