[cig-commits] r11939 - in seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta: . OUTPUT_FILES
dkomati1 at geodynamics.org
dkomati1 at geodynamics.org
Fri May 9 17:09:31 PDT 2008
Author: dkomati1
Date: 2008-05-09 17:09:31 -0700 (Fri, 09 May 2008)
New Revision: 11939
Modified:
seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/Makefile
seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/OUTPUT_FILES/values_from_mesher.h
seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/assemble_MPI_vector.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/constants.h
seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/create_header_file.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/read_compute_parameters.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/save_header_file.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/specfem3D.f90
Log:
implementation of FEWER_MESSAGES_LARGER_BUFFERS (option to reduce the size of the MPI buffers by increasing the number of messages)
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/Makefile
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/Makefile 2008-05-09 23:11:56 UTC (rev 11938)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/Makefile 2008-05-10 00:09:31 UTC (rev 11939)
@@ -30,17 +30,17 @@
#
# Intel ifort
#
-#FC = ifort
-#MPIFC = /home/r/geophy/dkomati1/bin/mpich2-1.0.7_ifort/bin/mpif90
-#FLAGS_NO_CHECK = -O3 -e95 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -std95 -check nobounds -align sequence -assume byterecl -i-dynamic -fpe3 -no-ftz
+FC = ifort
+MPIFC = /home/r/geophy/dkomati1/bin/mpich2-1.0.7_ifort/bin/mpif90
+FLAGS_NO_CHECK = -O3 -e95 -implicitnone -warn truncated_source -warn argument_checking -warn unused -warn declarations -std95 -check nobounds -align sequence -assume byterecl -i-dynamic -fpe3 -no-ftz
#FLAGS_NO_CHECK = -O3 -e95 -implicitnone -warn truncated_source -warn argument_checking -warn declarations -std95 -check all -align sequence -assume byterecl -warn unused
#
# GNU gfortran
#
-FC = /usr/bin/gfortran
-MPIFC = /home/r/geophy/dkomati1/bin/mpich2-1.0.7/bin/mpif90
-FLAGS_NO_CHECK = -std=gnu -fimplicit-none -frange-check -O3 -Wunused-labels -Waliasing -Wampersand -Wsurprising -Wline-truncation -Wunderflow
+#FC = /usr/bin/gfortran
+#MPIFC = /home/r/geophy/dkomati1/bin/mpich2-1.0.7/bin/mpif90
+#FLAGS_NO_CHECK = -std=gnu -fimplicit-none -frange-check -O3 -Wunused-labels -Waliasing -Wampersand -Wsurprising -Wline-truncation -Wunderflow
#
# Portland pgf90
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/OUTPUT_FILES/values_from_mesher.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/OUTPUT_FILES/values_from_mesher.h 2008-05-09 23:11:56 UTC (rev 11938)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/OUTPUT_FILES/values_from_mesher.h 2008-05-10 00:09:31 UTC (rev 11939)
@@ -1,4 +1,4 @@
-
+
!
! this is the parameter file for static compilation of the solver
!
@@ -43,7 +43,7 @@
!
! number of seismic sources = 1
!
-
+
! approximate static memory needed by the solver:
! ----------------------------------------------
!
@@ -58,36 +58,36 @@
! size of static arrays for all slices = 1151.40130090714 GB
! = 1.12441533291712 TB
!
-
+
integer, parameter :: NEX_XI_VAL = 864
integer, parameter :: NEX_ETA_VAL = 864
-
+
integer, parameter :: NSPEC_CRUST_MANTLE = 57456
integer, parameter :: NSPEC_OUTER_CORE = 7308
integer, parameter :: NSPEC_INNER_CORE = 4212
-
+
integer, parameter :: NGLOB_CRUST_MANTLE = 3776989
integer, parameter :: NGLOB_OUTER_CORE = 493569
integer, parameter :: NGLOB_INNER_CORE = 293125
-
+
integer, parameter :: NSPECMAX_ANISO_IC = 1
-
+
integer, parameter :: NSPECMAX_ISO_MANTLE = 57456
integer, parameter :: NSPECMAX_TISO_MANTLE = 1
integer, parameter :: NSPECMAX_ANISO_MANTLE = 1
-
+
integer, parameter :: NSPEC_CRUST_MANTLE_ATTENUAT = 1
integer, parameter :: NSPEC_INNER_CORE_ATTENUATION = 1
-
+
integer, parameter :: NSPEC_CRUST_MANTLE_STR_OR_ATT = 1
integer, parameter :: NSPEC_INNER_CORE_STR_OR_ATT = 1
-
+
integer, parameter :: NSPEC_CRUST_MANTLE_STR_AND_ATT = 1
integer, parameter :: NSPEC_INNER_CORE_STR_AND_ATT = 1
-
+
integer, parameter :: NSPEC_CRUST_MANTLE_STRAIN_ONLY = 1
integer, parameter :: NSPEC_INNER_CORE_STRAIN_ONLY = 1
-
+
integer, parameter :: NSPEC_CRUST_MANTLE_ADJOINT = 1
integer, parameter :: NSPEC_OUTER_CORE_ADJOINT = 1
integer, parameter :: NSPEC_INNER_CORE_ADJOINT = 1
@@ -95,29 +95,29 @@
integer, parameter :: NGLOB_OUTER_CORE_ADJOINT = 1
integer, parameter :: NGLOB_INNER_CORE_ADJOINT = 1
integer, parameter :: NSPEC_OUTER_CORE_ROT_ADJOINT = 1
-
+
integer, parameter :: NSPEC_CRUST_MANTLE_STACEY = 1
integer, parameter :: NSPEC_OUTER_CORE_STACEY = 1
-
+
integer, parameter :: NGLOB_CRUST_MANTLE_OCEANS = 1
-
+
logical, parameter :: TRANSVERSE_ISOTROPY_VAL = .false.
-
+
logical, parameter :: ANISOTROPIC_3D_MANTLE_VAL = .false.
-
+
logical, parameter :: ANISOTROPIC_INNER_CORE_VAL = .false.
-
+
logical, parameter :: ATTENUATION_VAL = .false.
-
+
logical, parameter :: ATTENUATION_3D_VAL = .false.
-
+
logical, parameter :: ELLIPTICITY_VAL = .false.
-
+
logical, parameter :: GRAVITY_VAL = .false.
-
+
logical, parameter :: ROTATION_VAL = .false.
integer, parameter :: NSPEC_OUTER_CORE_ROTATION = 1
-
+
integer, parameter :: NGLOB1D_RADIAL_CM = 493
integer, parameter :: NGLOB1D_RADIAL_OC = 345
integer, parameter :: NGLOB1D_RADIAL_IC = 37
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/assemble_MPI_vector.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/assemble_MPI_vector.f90 2008-05-09 23:11:56 UTC (rev 11938)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/assemble_MPI_vector.f90 2008-05-10 00:09:31 UTC (rev 11939)
@@ -45,7 +45,7 @@
buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector, &
NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL_crust_mantle, &
- NGLOB1D_RADIAL_inner_core,NCHUNKS)
+ NGLOB1D_RADIAL_inner_core,NCHUNKS,NDIM_smaller_buffers)
implicit none
@@ -71,7 +71,7 @@
integer npoin2D_faces_inner_core(NUMFACES_SHARED)
integer NGLOB1D_RADIAL_crust_mantle,NGLOB1D_RADIAL_inner_core,NPROC_XI,NPROC_ETA
- integer NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS
+ integer NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS,NDIM_smaller_buffers
! for addressing of the slices
integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1) :: addressing
@@ -90,7 +90,7 @@
integer :: npoin2D_max_all
integer, dimension(NGLOB2DMAX_XY_VAL_CM,NUMFACES_SHARED) :: iboolfaces_crust_mantle
integer, dimension(NGLOB2DMAX_XY_VAL_IC,NUMFACES_SHARED) :: iboolfaces_inner_core
- real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all) :: buffer_send_faces_vector,buffer_received_faces_vector
+ real(kind=CUSTOM_REAL), dimension(NDIM_smaller_buffers,npoin2D_max_all) :: buffer_send_faces_vector,buffer_received_faces_vector
! buffers for send and receive between corners of the chunks
! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
@@ -110,7 +110,7 @@
integer ipoin,ipoin2D,ipoin1D
integer sender,receiver,ier
- integer imsg,imsg_loop
+ integer imsg,imsg_loop,iloop
integer icount_faces,npoin2D_chunks_all
integer :: npoin2D_xi_all,npoin2D_eta_all,NGLOB1D_RADIAL_all,ioffset
@@ -130,6 +130,9 @@
!---- assemble the contributions between slices using MPI
!----
+! loop three times if using smaller buffers, and only once if using larger buffers
+ do iloop = 1,NDIM + 1 - NDIM_smaller_buffers
+
!----
!---- first assemble along xi using the 2-D topology
!----
@@ -142,15 +145,19 @@
! slices copy the right face into the buffer
do ipoin = 1,npoin2D_xi_crust_mantle
- buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(1,iboolright_xi_crust_mantle(ipoin))
- buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolright_xi_crust_mantle(ipoin))
- buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolright_xi_crust_mantle(ipoin))
+ buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(iloop,iboolright_xi_crust_mantle(ipoin))
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolright_xi_crust_mantle(ipoin))
+ buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolright_xi_crust_mantle(ipoin))
+ endif
enddo
do ipoin = 1,npoin2D_xi_inner_core
- buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(1,iboolright_xi_inner_core(ipoin))
- buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolright_xi_inner_core(ipoin))
- buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolright_xi_inner_core(ipoin))
+ buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(iloop,iboolright_xi_inner_core(ipoin))
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolright_xi_inner_core(ipoin))
+ buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolright_xi_inner_core(ipoin))
+ endif
enddo
! send messages forward along each row
@@ -164,29 +171,33 @@
else
receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
endif
- call MPI_SENDRECV(buffer_send_faces_vector,NDIM*npoin2D_xi_all,CUSTOM_MPI_TYPE,receiver, &
- itag2,buffer_received_faces_vector,NDIM*npoin2D_xi_all,CUSTOM_MPI_TYPE,sender, &
+ call MPI_SENDRECV(buffer_send_faces_vector,NDIM_smaller_buffers*npoin2D_xi_all,CUSTOM_MPI_TYPE,receiver, &
+ itag2,buffer_received_faces_vector,NDIM_smaller_buffers*npoin2D_xi_all,CUSTOM_MPI_TYPE,sender, &
itag,MPI_COMM_WORLD,msg_status,ier)
! all slices add the buffer received to the contributions on the left face
if(iproc_xi > 0) then
do ipoin = 1,npoin2D_xi_crust_mantle
- accel_crust_mantle(1,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(1,iboolleft_xi_crust_mantle(ipoin)) + &
+ accel_crust_mantle(iloop,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(iloop,iboolleft_xi_crust_mantle(ipoin)) + &
buffer_received_faces_vector(1,ipoin)
- accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin)) + &
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin)) + &
buffer_received_faces_vector(2,ipoin)
- accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin)) + &
+ accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin)) + &
buffer_received_faces_vector(3,ipoin)
+ endif
enddo
do ipoin = 1,npoin2D_xi_inner_core
- accel_inner_core(1,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(1,iboolleft_xi_inner_core(ipoin)) + &
+ accel_inner_core(iloop,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(iloop,iboolleft_xi_inner_core(ipoin)) + &
buffer_received_faces_vector(1,ioffset + ipoin)
- accel_inner_core(2,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(2,iboolleft_xi_inner_core(ipoin)) + &
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ accel_inner_core(2,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(2,iboolleft_xi_inner_core(ipoin)) + &
buffer_received_faces_vector(2,ioffset + ipoin)
- accel_inner_core(3,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(3,iboolleft_xi_inner_core(ipoin)) + &
+ accel_inner_core(3,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(3,iboolleft_xi_inner_core(ipoin)) + &
buffer_received_faces_vector(3,ioffset + ipoin)
+ endif
enddo
endif
@@ -195,15 +206,19 @@
! now we have to send the result back to the sender
! all slices copy the left face into the buffer
do ipoin = 1,npoin2D_xi_crust_mantle
- buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(1,iboolleft_xi_crust_mantle(ipoin))
- buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin))
- buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin))
+ buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(iloop,iboolleft_xi_crust_mantle(ipoin))
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin))
+ buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin))
+ endif
enddo
do ipoin = 1,npoin2D_xi_inner_core
- buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(1,iboolleft_xi_inner_core(ipoin))
- buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolleft_xi_inner_core(ipoin))
- buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolleft_xi_inner_core(ipoin))
+ buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(iloop,iboolleft_xi_inner_core(ipoin))
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolleft_xi_inner_core(ipoin))
+ buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolleft_xi_inner_core(ipoin))
+ endif
enddo
! send messages backward along each row
@@ -217,23 +232,27 @@
else
receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
endif
- call MPI_SENDRECV(buffer_send_faces_vector,NDIM*npoin2D_xi_all,CUSTOM_MPI_TYPE,receiver, &
- itag2,buffer_received_faces_vector,NDIM*npoin2D_xi_all,CUSTOM_MPI_TYPE,sender, &
+ call MPI_SENDRECV(buffer_send_faces_vector,NDIM_smaller_buffers*npoin2D_xi_all,CUSTOM_MPI_TYPE,receiver, &
+ itag2,buffer_received_faces_vector,NDIM_smaller_buffers*npoin2D_xi_all,CUSTOM_MPI_TYPE,sender, &
itag,MPI_COMM_WORLD,msg_status,ier)
! all slices copy the buffer received to the contributions on the right face
if(iproc_xi < NPROC_XI-1) then
do ipoin = 1,npoin2D_xi_crust_mantle
- accel_crust_mantle(1,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(1,ipoin)
- accel_crust_mantle(2,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(2,ipoin)
- accel_crust_mantle(3,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(3,ipoin)
+ accel_crust_mantle(iloop,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(1,ipoin)
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ accel_crust_mantle(2,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(2,ipoin)
+ accel_crust_mantle(3,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(3,ipoin)
+ endif
enddo
do ipoin = 1,npoin2D_xi_inner_core
- accel_inner_core(1,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(1,ioffset + ipoin)
- accel_inner_core(2,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(2,ioffset + ipoin)
- accel_inner_core(3,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(3,ioffset + ipoin)
+ accel_inner_core(iloop,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(1,ioffset + ipoin)
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ accel_inner_core(2,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(2,ioffset + ipoin)
+ accel_inner_core(3,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(3,ioffset + ipoin)
+ endif
enddo
endif
@@ -252,15 +271,19 @@
! slices copy the right face into the buffer
do ipoin = 1,npoin2D_eta_crust_mantle
- buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(1,iboolright_eta_crust_mantle(ipoin))
- buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolright_eta_crust_mantle(ipoin))
- buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolright_eta_crust_mantle(ipoin))
+ buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(iloop,iboolright_eta_crust_mantle(ipoin))
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolright_eta_crust_mantle(ipoin))
+ buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolright_eta_crust_mantle(ipoin))
+ endif
enddo
do ipoin = 1,npoin2D_eta_inner_core
- buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(1,iboolright_eta_inner_core(ipoin))
- buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolright_eta_inner_core(ipoin))
- buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolright_eta_inner_core(ipoin))
+ buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(iloop,iboolright_eta_inner_core(ipoin))
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolright_eta_inner_core(ipoin))
+ buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolright_eta_inner_core(ipoin))
+ endif
enddo
! send messages forward along each row
@@ -274,29 +297,33 @@
else
receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
endif
- call MPI_SENDRECV(buffer_send_faces_vector,NDIM*npoin2D_eta_all,CUSTOM_MPI_TYPE,receiver, &
- itag2,buffer_received_faces_vector,NDIM*npoin2D_eta_all,CUSTOM_MPI_TYPE,sender, &
+ call MPI_SENDRECV(buffer_send_faces_vector,NDIM_smaller_buffers*npoin2D_eta_all,CUSTOM_MPI_TYPE,receiver, &
+ itag2,buffer_received_faces_vector,NDIM_smaller_buffers*npoin2D_eta_all,CUSTOM_MPI_TYPE,sender, &
itag,MPI_COMM_WORLD,msg_status,ier)
! all slices add the buffer received to the contributions on the left face
if(iproc_eta > 0) then
do ipoin = 1,npoin2D_eta_crust_mantle
- accel_crust_mantle(1,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(1,iboolleft_eta_crust_mantle(ipoin)) + &
+ accel_crust_mantle(iloop,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(iloop,iboolleft_eta_crust_mantle(ipoin)) + &
buffer_received_faces_vector(1,ipoin)
- accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin)) + &
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin)) + &
buffer_received_faces_vector(2,ipoin)
- accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin)) + &
+ accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin)) + &
buffer_received_faces_vector(3,ipoin)
+ endif
enddo
do ipoin = 1,npoin2D_eta_inner_core
- accel_inner_core(1,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(1,iboolleft_eta_inner_core(ipoin)) + &
+ accel_inner_core(iloop,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(iloop,iboolleft_eta_inner_core(ipoin)) + &
buffer_received_faces_vector(1,ioffset + ipoin)
- accel_inner_core(2,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(2,iboolleft_eta_inner_core(ipoin)) + &
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ accel_inner_core(2,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(2,iboolleft_eta_inner_core(ipoin)) + &
buffer_received_faces_vector(2,ioffset + ipoin)
- accel_inner_core(3,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(3,iboolleft_eta_inner_core(ipoin)) + &
+ accel_inner_core(3,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(3,iboolleft_eta_inner_core(ipoin)) + &
buffer_received_faces_vector(3,ioffset + ipoin)
+ endif
enddo
endif
@@ -305,15 +332,19 @@
! now we have to send the result back to the sender
! all slices copy the left face into the buffer
do ipoin = 1,npoin2D_eta_crust_mantle
- buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(1,iboolleft_eta_crust_mantle(ipoin))
- buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin))
- buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin))
+ buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(iloop,iboolleft_eta_crust_mantle(ipoin))
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin))
+ buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin))
+ endif
enddo
do ipoin = 1,npoin2D_eta_inner_core
- buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(1,iboolleft_eta_inner_core(ipoin))
- buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolleft_eta_inner_core(ipoin))
- buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolleft_eta_inner_core(ipoin))
+ buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(iloop,iboolleft_eta_inner_core(ipoin))
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolleft_eta_inner_core(ipoin))
+ buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolleft_eta_inner_core(ipoin))
+ endif
enddo
! send messages backward along each row
@@ -327,29 +358,37 @@
else
receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
endif
- call MPI_SENDRECV(buffer_send_faces_vector,NDIM*npoin2D_eta_all,CUSTOM_MPI_TYPE,receiver, &
- itag2,buffer_received_faces_vector,NDIM*npoin2D_eta_all,CUSTOM_MPI_TYPE,sender, &
+ call MPI_SENDRECV(buffer_send_faces_vector,NDIM_smaller_buffers*npoin2D_eta_all,CUSTOM_MPI_TYPE,receiver, &
+ itag2,buffer_received_faces_vector,NDIM_smaller_buffers*npoin2D_eta_all,CUSTOM_MPI_TYPE,sender, &
itag,MPI_COMM_WORLD,msg_status,ier)
! all slices copy the buffer received to the contributions on the right face
if(iproc_eta < NPROC_ETA-1) then
do ipoin = 1,npoin2D_eta_crust_mantle
- accel_crust_mantle(1,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(1,ipoin)
- accel_crust_mantle(2,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(2,ipoin)
- accel_crust_mantle(3,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(3,ipoin)
+ accel_crust_mantle(iloop,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(1,ipoin)
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ accel_crust_mantle(2,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(2,ipoin)
+ accel_crust_mantle(3,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(3,ipoin)
+ endif
enddo
do ipoin = 1,npoin2D_eta_inner_core
- accel_inner_core(1,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(1,ioffset + ipoin)
- accel_inner_core(2,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(2,ioffset + ipoin)
- accel_inner_core(3,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(3,ioffset + ipoin)
+ accel_inner_core(iloop,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(1,ioffset + ipoin)
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ accel_inner_core(2,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(2,ioffset + ipoin)
+ accel_inner_core(3,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(3,ioffset + ipoin)
+ endif
enddo
endif
endif
+ enddo ! of loop on iloop depending on NDIM_smaller_buffers
+
+! 33333333333333333333 YYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYYY
+
!----
!---- start MPI assembling phase between chunks
!----
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/constants.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/constants.h 2008-05-09 23:11:56 UTC (rev 11938)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/constants.h 2008-05-10 00:09:31 UTC (rev 11939)
@@ -31,6 +31,10 @@
!--- user can modify parameters below
!
+! deccrease the number of MPI messages by 3 but increase the size
+! of several MPI buffers by 3 but in order to do that
+ logical, parameter :: FEWER_MESSAGES_LARGER_BUFFERS = .true.
+
!
! solver in single or double precision depending on the machine (4 or 8 bytes)
!
@@ -46,15 +50,6 @@
! set to SIZE_DOUBLE to run in double precision (increases memory size by 2)
integer, parameter :: CUSTOM_REAL = SIZE_REAL
-! if files on a local path on each node are also seen as global with same path
-! set to .true. typically on a shared-memory machine with a common file system
-! set to .false. typically on a cluster of nodes with local disks
-! if running on a cluster of nodes with local disks, also customize global path
-! to local files in create_serial_name_database.f90 ("20 format ...")
-! Flag is used only when one checks the mesh with the serial codes
-! ("xcheck_buffers_1D" etc.), ignore it if you do not plan to use them
- logical, parameter :: LOCAL_PATH_IS_ALSO_GLOBAL = .false.
-
! input, output and main MPI I/O files
integer, parameter :: ISTANDARD_OUTPUT = 6
integer, parameter :: IIN = 40,IOUT = 41,IOUT_SAC = 903
@@ -366,9 +361,6 @@
! in which case it is 4
integer, parameter :: NUMCORNERS_SHARED = 1 !!!!!! DK DK removed support for one slice only 4
-! number of slaves per corner
- integer, parameter :: NUMSLAVES = 2
-
! number of layers in PREM
integer, parameter :: NR = 640
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/create_header_file.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/create_header_file.f90 2008-05-09 23:11:56 UTC (rev 11938)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/create_header_file.f90 2008-05-10 00:09:31 UTC (rev 11939)
@@ -214,8 +214,8 @@
print *,'number of time steps = ',NSTEP
print *
- print *,'on NEC SX and Earth Simulator, make sure "loopcnt=" parameter'
-! use fused loops on the ES
+ print *,'on NEC SX, make sure "loopcnt=" parameter'
+! use fused loops on NEC SX
print *,'in Makefile is greater than max vector length = ',nglob(IREGION_CRUST_MANTLE)*NDIM
print *
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/read_compute_parameters.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/read_compute_parameters.f90 2008-05-09 23:11:56 UTC (rev 11938)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/read_compute_parameters.f90 2008-05-10 00:09:31 UTC (rev 11939)
@@ -1164,6 +1164,11 @@
if(NCHUNKS > 2 .and. NEX_XI /= NEX_ETA) stop 'must have NEX_XI = NEX_ETA for more than two chunks'
if(NCHUNKS > 2 .and. NPROC_XI /= NPROC_ETA) stop 'must have NPROC_XI = NPROC_ETA for more than two chunks'
+! check that option to run one slice only per chunk has been activated
+! (it is deactivated by default because MPI buffers use more memory when it is on)
+ if((NPROC_XI == 1 .or. NPROC_ETA == 1) .and. (NUMFACES_SHARED /= 4 .or. NUMCORNERS_SHARED /= 4)) &
+ stop 'option to run one slice only per chunk is deactivated, edit constants.h and recompile'
+
! check that IASP91, AK135, 1066A, JP1D or SEA1D is isotropic
if((REFERENCE_1D_MODEL == REFERENCE_MODEL_IASP91 .or. &
REFERENCE_1D_MODEL == REFERENCE_MODEL_AK135 .or. &
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/save_header_file.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/save_header_file.f90 2008-05-09 23:11:56 UTC (rev 11938)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/save_header_file.f90 2008-05-10 00:09:31 UTC (rev 11939)
@@ -131,8 +131,8 @@
write(IOUT,*) '!'
write(IOUT,*) '! maximum number of points per region = ',nglob(IREGION_CRUST_MANTLE)
write(IOUT,*) '!'
-! use fused loops on the ES
- write(IOUT,*) '! on NEC SX and Earth Simulator, make sure "loopcnt=" parameter'
+! use fused loops on NEC SX
+ write(IOUT,*) '! on NEC SX, make sure "loopcnt=" parameter'
write(IOUT,*) '! in Makefile is greater than max vector length = ',nglob(IREGION_CRUST_MANTLE)*NDIM
write(IOUT,*) '!'
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/specfem3D.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/specfem3D.f90 2008-05-09 23:11:56 UTC (rev 11938)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/version41_beta/specfem3D.f90 2008-05-10 00:09:31 UTC (rev 11939)
@@ -514,7 +514,7 @@
integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_inner_core,npoin2D_eta_inner_core
!! DK DK added this to reduce the size of the buffers
- integer :: npoin2D_max_all
+ integer :: npoin2D_max_all,NDIM_smaller_buffers
integer ichunk,iproc_xi,iproc_eta !!!!!!!!!!!!!!!!!!!!!!,iproc,iproc_read
integer NPROC_ONE_DIRECTION
@@ -634,8 +634,13 @@
! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
npoin2D_max_all = max(maxval(npoin2D_xi_crust_mantle(:) + npoin2D_xi_inner_core(:)), &
maxval(npoin2D_eta_crust_mantle(:) + npoin2D_eta_inner_core(:)))
- allocate(buffer_send_faces(NDIM,npoin2D_max_all))
- allocate(buffer_received_faces(NDIM,npoin2D_max_all))
+ if(FEWER_MESSAGES_LARGER_BUFFERS) then
+ NDIM_smaller_buffers = NDIM
+ else
+ NDIM_smaller_buffers = 1
+ endif
+ allocate(buffer_send_faces(NDIM_smaller_buffers,npoin2D_max_all))
+ allocate(buffer_received_faces(NDIM_smaller_buffers,npoin2D_max_all))
if (myrank == 0) then
@@ -2291,7 +2296,7 @@
buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector, &
NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
- NGLOB1D_RADIAL(IREGION_INNER_CORE),NCHUNKS)
+ NGLOB1D_RADIAL(IREGION_INNER_CORE),NCHUNKS,NDIM_smaller_buffers)
!---
!--- use buffers to assemble forces with the central cube
More information about the cig-commits
mailing list