[cig-commits] r17932 - seismo/3D/SPECFEM3D_GLOBE/trunk
dkomati1 at geodynamics.org
dkomati1 at geodynamics.org
Mon Feb 21 17:39:53 PST 2011
Author: dkomati1
Date: 2011-02-21 17:39:52 -0800 (Mon, 21 Feb 2011)
New Revision: 17932
Modified:
seismo/3D/SPECFEM3D_GLOBE/trunk/assemble_MPI_central_cube.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/assemble_MPI_vector.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_crust_mantle.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_crust_mantle_Dev.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_inner_core.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_inner_core_Dev.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_outer_core_Dev.f90
seismo/3D/SPECFEM3D_GLOBE/trunk/specfem3D.f90
Log:
added non blocking MPI for crust_mantle, inner_core and central_cube for the forward problem;
i.e., done adding non blocking MPI for the whole forward problem.
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/assemble_MPI_central_cube.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/assemble_MPI_central_cube.f90 2011-02-22 01:39:25 UTC (rev 17931)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/assemble_MPI_central_cube.f90 2011-02-22 01:39:52 UTC (rev 17932)
@@ -25,29 +25,30 @@
!
!=====================================================================
-subroutine assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
- npoin2D_cube_from_slices, buffer_all_cube_from_slices, buffer_slices, buffer_slices2, ibool_central_cube, &
- receiver_cube_from_slices, ibool_inner_core, idoubling_inner_core, NSPEC_INNER_CORE, &
- ibelm_bottom_inner_core, NSPEC2D_BOTTOM_INNER_CORE,NGLOB_INNER_CORE,vector_assemble,ndim_assemble)
+ subroutine assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
+ ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,vector_assemble,ndim_assemble,iphase_CC)
-! this version of the routine is based on non blocking MPI calls
-
implicit none
! standard include of the MPI library
include 'mpif.h'
include 'constants.h'
+! include values created by the mesher
+ include "OUTPUT_FILES/values_from_mesher.h"
+
! for matching with central cube in inner core
- integer ichunk, nb_msgs_theor_in_cube, npoin2D_cube_from_slices
+ integer ichunk, nb_msgs_theor_in_cube, npoin2D_cube_from_slices,iphase_CC
integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
- double precision, dimension(npoin2D_cube_from_slices,NDIM) :: buffer_slices,buffer_slices2
- double precision, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM) :: buffer_all_cube_from_slices
+ double precision, dimension(npoin2D_cube_from_slices,ndim_assemble) :: buffer_slices
+ double precision, dimension(npoin2D_cube_from_slices,ndim_assemble,nb_msgs_theor_in_cube) :: buffer_all_cube_from_slices
integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
integer receiver_cube_from_slices
! local to global mapping
- integer NSPEC_INNER_CORE,NSPEC2D_BOTTOM_INNER_CORE, NGLOB_INNER_CORE
+ integer NSPEC2D_BOTTOM_INNER_CORE
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
@@ -63,62 +64,76 @@
real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: array_central_cube
! MPI status of messages to be received
- integer msg_status(MPI_STATUS_SIZE), ier
+ integer, save :: request_send,request_receive
+! maximum value of nb_msgs_theor_in_cube is 5 (when NPROC_XI == 1)
+! therefore NPROC_XI+4 is always large enough
+ integer, dimension(NPROC_XI_VAL+4), save :: request_send_array,request_receive_array
+ logical :: flag_result_test
+ integer, dimension(MPI_STATUS_SIZE) :: msg_status
+ integer :: ier
! mask
logical, dimension(NGLOB_INNER_CORE) :: mask
!---
-!--- now use buffers to assemble mass matrix with central cube once and for all
+!--- use buffers to assemble mass matrix with central cube once and for all
!---
+ if(iphase_CC == 1) then
+
! on chunks AB and AB_ANTIPODE, receive all the messages from slices
if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-
do imsg = 1,nb_msgs_theor_in_cube-1
-
! receive buffers from slices
- sender = sender_from_slices_to_cube(imsg)
- call MPI_RECV(buffer_slices, &
+ sender = sender_from_slices_to_cube(imsg)
+ call MPI_IRECV(buffer_all_cube_from_slices(:,:,imsg), &
ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
-
-! copy buffer in 2D array for each slice
- buffer_all_cube_from_slices(imsg,:,1:ndim_assemble) = buffer_slices(:,1:ndim_assemble)
-
+ itag,MPI_COMM_WORLD,request_receive_array(imsg),ier)
enddo
endif
! send info to central cube from all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
-
! for bottom elements in contact with central cube from the slices side
ipoin = 0
do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
-
ispec = ibelm_bottom_inner_core(ispec2D)
-
! only for DOFs exactly on surface of central cube (bottom of these elements)
k = 1
do j = 1,NGLLY
do i = 1,NGLLX
ipoin = ipoin + 1
- buffer_slices(ipoin,1:ndim_assemble) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
+ buffer_slices(ipoin,:) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
enddo
enddo
enddo
-
! send buffer to central cube
receiver = receiver_cube_from_slices
- call MPI_SEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
- MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,ier)
-
+ call MPI_ISSEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
+ MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,request_send,ier)
endif ! end sending info to central cube
+ iphase_CC = iphase_CC + 1
+ return ! exit because we have started some communications therefore we need some time
+ endif !!!!!!!!! end of iphase_CC 1
+
+ if(iphase_CC == 2) then
+
+ if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ endif
+
+ if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+ do imsg = 1,nb_msgs_theor_in_cube-1
+ call MPI_TEST(request_receive_array(imsg),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+ enddo
+ endif
+
! exchange of their bottom faces between chunks AB and AB_ANTIPODE
if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-
ipoin = 0
do ispec = NSPEC_INNER_CORE, 1, -1
if (idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE) then
@@ -126,26 +141,41 @@
do j = 1,NGLLY
do i = 1,NGLLX
ipoin = ipoin + 1
- buffer_slices(ipoin,1:ndim_assemble) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
+ buffer_slices(ipoin,:) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
enddo
enddo
endif
enddo
-
sender = sender_from_slices_to_cube(nb_msgs_theor_in_cube)
+! call MPI_SENDRECV(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
+! itag,buffer_slices2,ndim_assemble*npoin2D_cube_from_slices,&
+! MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,msg_status,ier)
- call MPI_SENDRECV(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
- itag,buffer_slices2,ndim_assemble*npoin2D_cube_from_slices,&
- MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+ call MPI_IRECV(buffer_all_cube_from_slices(:,:,nb_msgs_theor_in_cube), &
+ ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,request_receive,ier)
+!! DK DK this merged with previous statement
+! buffer_all_cube_from_slices(:,:,nb_msgs_theor_in_cube) = buffer_slices2(:,:)
- buffer_all_cube_from_slices(nb_msgs_theor_in_cube,:,1:ndim_assemble) = buffer_slices2(:,1:ndim_assemble)
-
+ call MPI_ISSEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
+ itag,MPI_COMM_WORLD,request_send,ier)
endif
+ iphase_CC = iphase_CC + 1
+ return ! exit because we have started some communications therefore we need some time
+
+ endif !!!!!!!!! end of iphase_CC 2
+
+ if(iphase_CC == 3) then
+
!--- now we need to assemble the contributions
if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+
do idimension = 1,ndim_assemble
! erase contributions to central cube array
array_central_cube(:) = 0._CUSTOM_REAL
@@ -155,9 +185,9 @@
do imsg = 1,nb_msgs_theor_in_cube-1
do ipoin = 1,npoin2D_cube_from_slices
if(CUSTOM_REAL == SIZE_REAL) then
- array_central_cube(ibool_central_cube(imsg,ipoin)) = sngl(buffer_all_cube_from_slices(imsg,ipoin,idimension))
+ array_central_cube(ibool_central_cube(imsg,ipoin)) = sngl(buffer_all_cube_from_slices(ipoin,idimension,imsg))
else
- array_central_cube(ibool_central_cube(imsg,ipoin)) = buffer_all_cube_from_slices(imsg,ipoin,idimension)
+ array_central_cube(ibool_central_cube(imsg,ipoin)) = buffer_all_cube_from_slices(ipoin,idimension,imsg)
endif
enddo
enddo
@@ -169,11 +199,11 @@
if(CUSTOM_REAL == SIZE_REAL) then
array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
- sngl(buffer_all_cube_from_slices(nb_msgs_theor_in_cube,ipoin,idimension))
+ sngl(buffer_all_cube_from_slices(ipoin,idimension,nb_msgs_theor_in_cube))
else
array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
- buffer_all_cube_from_slices(nb_msgs_theor_in_cube,ipoin,idimension)
+ buffer_all_cube_from_slices(ipoin,idimension,nb_msgs_theor_in_cube)
endif
mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = .true.
endif
@@ -197,7 +227,7 @@
! copy sum back
do imsg = 1,nb_msgs_theor_in_cube-1
do ipoin = 1,npoin2D_cube_from_slices
- buffer_all_cube_from_slices(imsg,ipoin,idimension) = vector_assemble(idimension,ibool_central_cube(imsg,ipoin))
+ buffer_all_cube_from_slices(ipoin,idimension,imsg) = vector_assemble(idimension,ibool_central_cube(imsg,ipoin))
enddo
enddo
@@ -209,55 +239,88 @@
! receive info from central cube on all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
-
! receive buffers from slices
sender = receiver_cube_from_slices
- call MPI_RECV(buffer_slices, &
+ call MPI_IRECV(buffer_slices, &
ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
+ itag,MPI_COMM_WORLD,request_receive,ier)
+! for bottom elements in contact with central cube from the slices side
+! ipoin = 0
+! do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+! ispec = ibelm_bottom_inner_core(ispec2D)
+! only for DOFs exactly on surface of central cube (bottom of these elements)
+! k = 1
+! do j = 1,NGLLY
+! do i = 1,NGLLX
+! ipoin = ipoin + 1
+! distinguish between single and double precision for reals
+! if(CUSTOM_REAL == SIZE_REAL) then
+! vector_assemble(:,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,:))
+! else
+! vector_assemble(:,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,:)
+! endif
+! enddo
+! enddo
+! enddo
+ endif ! end receiving info from central cube
+!------- send info back from central cube to slices
+
+! on chunk AB & CHUNK_AB_ANTIPODE, send all the messages to slices
+ if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+ do imsg = 1,nb_msgs_theor_in_cube-1
+! send buffers to slices
+ receiver = sender_from_slices_to_cube(imsg)
+ call MPI_ISSEND(buffer_all_cube_from_slices(:,:,imsg),ndim_assemble*npoin2D_cube_from_slices, &
+ MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,request_send_array(imsg),ier)
+ enddo
+ endif
+
+ iphase_CC = iphase_CC + 1
+ return ! exit because we have started some communications therefore we need some time
+
+ endif !!!!!!!!! end of iphase_CC 3
+
+ if(iphase_CC == 4) then
+
+ if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+ do imsg = 1,nb_msgs_theor_in_cube-1
+ call MPI_TEST(request_send_array(imsg),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ enddo
+ endif
+
+ if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+ endif
+
+! receive info from central cube on all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
+ if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
! for bottom elements in contact with central cube from the slices side
ipoin = 0
do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
-
ispec = ibelm_bottom_inner_core(ispec2D)
-
! only for DOFs exactly on surface of central cube (bottom of these elements)
k = 1
do j = 1,NGLLY
do i = 1,NGLLX
ipoin = ipoin + 1
-
! distinguish between single and double precision for reals
if(CUSTOM_REAL == SIZE_REAL) then
- vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,1:ndim_assemble))
+ vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,:))
else
- vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,1:ndim_assemble)
+ vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,:)
endif
-
enddo
enddo
enddo
-
endif ! end receiving info from central cube
-!------- send info back from central cube to slices
+! this is the exit condition, to go beyond the last phase number
+ iphase_CC = iphase_CC + 1
-! on chunk AB & CHUNK_AB_ANTIPODE, send all the messages to slices
- if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+ endif !!!!!!!!! end of iphase_CC 4
- do imsg = 1,nb_msgs_theor_in_cube-1
+ end subroutine assemble_MPI_central_cube
-! copy buffer in 2D array for each slice
- buffer_slices(:,1:ndim_assemble) = buffer_all_cube_from_slices(imsg,:,1:ndim_assemble)
-
-! send buffers to slices
- receiver = sender_from_slices_to_cube(imsg)
- call MPI_SEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
- MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,ier)
-
- enddo
- endif
-
-end subroutine assemble_MPI_central_cube
-
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/assemble_MPI_vector.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/assemble_MPI_vector.f90 2011-02-22 01:39:25 UTC (rev 17931)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/assemble_MPI_vector.f90 2011-02-22 01:39:52 UTC (rev 17932)
@@ -31,9 +31,7 @@
!---- to reduce the total number of MPI calls
!----
- subroutine assemble_MPI_vector(myrank, &
- accel_crust_mantle,NGLOB_CRUST_MANTLE, &
- accel_inner_core,NGLOB_INNER_CORE, &
+ subroutine assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
iproc_xi,iproc_eta,ichunk,addressing, &
iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
@@ -41,20 +39,14 @@
iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
iboolfaces_inner_core,iboolcorner_inner_core, &
- iprocfrom_faces,iprocto_faces,imsg_type, &
+ iprocfrom_faces,iprocto_faces, &
iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
- buffer_send_faces_vector,buffer_received_faces_vector, &
+ buffer_send_faces_vector,buffer_received_faces_vector,npoin2D_max_all_CM_IC, &
buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector, &
- NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
- NPROC_XI,NPROC_ETA, &
- NGLOB1D_RADIAL_crust_mantle, &
- NGLOB2DMAX_XMIN_XMAX_CM,NGLOB2DMAX_YMIN_YMAX_CM, &
- NGLOB1D_RADIAL_inner_core, &
- NGLOB2DMAX_XMIN_XMAX_IC,NGLOB2DMAX_YMIN_YMAX_IC, &
- NGLOB2DMAX_XY,NCHUNKS)
+ NUMMSGS_FACES,NCORNERSCHUNKS, &
+ NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL_crust_mantle, &
+ NGLOB1D_RADIAL_inner_core,NCHUNKS,iphase)
-! this version of the routine is based on non blocking MPI calls
-
implicit none
! standard include of the MPI library
@@ -63,23 +55,25 @@
include "constants.h"
include "precision.h"
- integer myrank,NGLOB_CRUST_MANTLE,NGLOB_INNER_CORE,NCHUNKS
+! include values created by the mesher
+ include "OUTPUT_FILES/values_from_mesher.h"
+ integer myrank,NCHUNKS,iphase
+
! the two arrays to assemble
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: accel_crust_mantle
real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: accel_inner_core
integer iproc_xi,iproc_eta,ichunk
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle
integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
- integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_inner_core,npoin2D_eta_inner_core
integer npoin2D_faces_inner_core(NUMFACES_SHARED)
- integer NGLOB2DMAX_XMIN_XMAX_CM,NGLOB2DMAX_YMIN_YMAX_CM,NGLOB1D_RADIAL_crust_mantle
- integer NGLOB2DMAX_XMIN_XMAX_IC,NGLOB2DMAX_YMIN_YMAX_IC,NGLOB1D_RADIAL_inner_core
- integer NPROC_XI,NPROC_ETA,NGLOB2DMAX_XY
- integer NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS
+ integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ npoin2D_xi_inner_core,npoin2D_eta_inner_core
+ integer NGLOB1D_RADIAL_crust_mantle,NGLOB1D_RADIAL_inner_core,NPROC_XI,NPROC_ETA
+ integer NUMMSGS_FACES,NCORNERSCHUNKS
+
! for addressing of the slices
integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1) :: addressing
@@ -94,9 +88,11 @@
integer, dimension(NGLOB1D_RADIAL_inner_core,NUMCORNERS_SHARED) :: iboolcorner_inner_core
integer icount_corners
- integer, dimension(NGLOB2DMAX_XY,NUMFACES_SHARED) :: iboolfaces_crust_mantle,iboolfaces_inner_core
-! size of buffers is multiplied by 2 because we handle two regions in the same MPI call
- real(kind=CUSTOM_REAL), dimension(NDIM,2*NGLOB2DMAX_XY) :: buffer_send_faces_vector,buffer_received_faces_vector
+ integer :: npoin2D_max_all_CM_IC
+ integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
+ integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
+ real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED) :: &
+ buffer_send_faces_vector,buffer_received_faces_vector
! buffers for send and receive between corners of the chunks
! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
@@ -106,25 +102,40 @@
! ---- arrays to assemble between chunks
! communication pattern for faces between chunks
- integer, dimension(NUMMSGS_FACES) :: iprocfrom_faces,iprocto_faces,imsg_type
+ integer, dimension(NUMMSGS_FACES) :: iprocfrom_faces,iprocto_faces
! communication pattern for corners between chunks
integer, dimension(NCORNERSCHUNKS) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
! MPI status of messages to be received
- integer msg_status(MPI_STATUS_SIZE)
+ integer, dimension(MPI_STATUS_SIZE) :: msg_status
- integer ipoin,ipoin2D,ipoin1D
- integer sender,receiver,ier
- integer imsg,imsg_loop
- integer icount_faces,npoin2D_chunks_all
+ integer :: ipoin,ipoin2D,ipoin1D
+ integer :: sender,receiver
+ integer :: imsg
+ integer :: icount_faces,npoin2D_chunks_all
- integer :: NGLOB1D_RADIAL_all,ioffset
+ integer :: NGLOB1D_RADIAL_all
integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_all,npoin2D_eta_all
+
+! do not remove the "save" statement because this routine is non blocking
+! therefore it needs to find the right value of ioffset when it re-enters
+! the routine later to perform the next communication step
+ integer, save :: ioffset
+
+ integer :: ier
+! do not remove the "save" statement because this routine is non blocking
+ integer, save :: request_send,request_receive
+ integer, dimension(NUMFACES_SHARED), save :: request_send_array,request_receive_array
+ logical :: flag_result_test
+
! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
! check flag to see if we need to assemble (might be turned off when debugging)
- if (.not. ACTUALLY_ASSEMBLE_MPI_SLICES) return
+ if (.not. ACTUALLY_ASSEMBLE_MPI_SLICES) then
+ iphase = 9999 ! this means everything is finished
+ return
+ endif
! here we have to assemble all the contributions between slices using MPI
@@ -140,23 +151,22 @@
!---- first assemble along xi using the 2-D topology
!----
-! assemble along xi only if more than one slice
- if(NPROC_XI > 1) then
+ if(iphase == 1) then
! slices copy the right face into the buffer
+ do ipoin = 1,npoin2D_xi_crust_mantle(2)
+ buffer_send_faces_vector(1,ipoin,1) = accel_crust_mantle(1,iboolright_xi_crust_mantle(ipoin))
+ buffer_send_faces_vector(2,ipoin,1) = accel_crust_mantle(2,iboolright_xi_crust_mantle(ipoin))
+ buffer_send_faces_vector(3,ipoin,1) = accel_crust_mantle(3,iboolright_xi_crust_mantle(ipoin))
+ enddo
+
! the buffer for the inner core starts right after the buffer for the crust and mantle
ioffset = npoin2D_xi_crust_mantle(2)
- do ipoin = 1,npoin2D_xi_crust_mantle(2)
- buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(1,iboolright_xi_crust_mantle(ipoin))
- buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolright_xi_crust_mantle(ipoin))
- buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolright_xi_crust_mantle(ipoin))
- enddo
-
do ipoin = 1,npoin2D_xi_inner_core(2)
- buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(1,iboolright_xi_inner_core(ipoin))
- buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolright_xi_inner_core(ipoin))
- buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolright_xi_inner_core(ipoin))
+ buffer_send_faces_vector(1,ioffset + ipoin,1) = accel_inner_core(1,iboolright_xi_inner_core(ipoin))
+ buffer_send_faces_vector(2,ioffset + ipoin,1) = accel_inner_core(2,iboolright_xi_inner_core(ipoin))
+ buffer_send_faces_vector(3,ioffset + ipoin,1) = accel_inner_core(3,iboolright_xi_inner_core(ipoin))
enddo
! send messages forward along each row
@@ -170,31 +180,48 @@
else
receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
endif
+ call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_xi_all(1),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive,ier)
- call MPI_SENDRECV(buffer_send_faces_vector,NDIM*npoin2D_xi_all(2),CUSTOM_MPI_TYPE,receiver, &
- itag2,buffer_received_faces_vector,NDIM*npoin2D_xi_all(1),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
+ call MPI_ISSEND(buffer_send_faces_vector,NDIM*npoin2D_xi_all(2),CUSTOM_MPI_TYPE,receiver, &
+ itag2,MPI_COMM_WORLD,request_send,ier)
+ iphase = iphase + 1
+ return ! exit because we have started some communications therefore we need some time
+
+ endif !!!!!!!!! end of iphase 1
+
+ if(iphase == 2) then
+
+! call MPI_WAIT(request_send,msg_status,ier)
+! call MPI_WAIT(request_receive,msg_status,ier)
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+
! all slices add the buffer received to the contributions on the left face
if(iproc_xi > 0) then
do ipoin = 1,npoin2D_xi_crust_mantle(1)
accel_crust_mantle(1,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(1,iboolleft_xi_crust_mantle(ipoin)) + &
- buffer_received_faces_vector(1,ipoin)
+ buffer_received_faces_vector(1,ipoin,1)
accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin)) + &
- buffer_received_faces_vector(2,ipoin)
+ buffer_received_faces_vector(2,ipoin,1)
accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin)) + &
- buffer_received_faces_vector(3,ipoin)
+ buffer_received_faces_vector(3,ipoin,1)
enddo
+! the buffer for the inner core starts right after the buffer for the crust and mantle
ioffset = npoin2D_xi_crust_mantle(1)
+
do ipoin = 1,npoin2D_xi_inner_core(1)
accel_inner_core(1,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(1,iboolleft_xi_inner_core(ipoin)) + &
- buffer_received_faces_vector(1,ioffset + ipoin)
+ buffer_received_faces_vector(1,ioffset + ipoin,1)
accel_inner_core(2,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(2,iboolleft_xi_inner_core(ipoin)) + &
- buffer_received_faces_vector(2,ioffset + ipoin)
+ buffer_received_faces_vector(2,ioffset + ipoin,1)
accel_inner_core(3,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(3,iboolleft_xi_inner_core(ipoin)) + &
- buffer_received_faces_vector(3,ioffset + ipoin)
+ buffer_received_faces_vector(3,ioffset + ipoin,1)
enddo
endif
@@ -202,19 +229,19 @@
! the contributions are correctly assembled on the left side of each slice
! now we have to send the result back to the sender
! all slices copy the left face into the buffer
+ do ipoin = 1,npoin2D_xi_crust_mantle(1)
+ buffer_send_faces_vector(1,ipoin,1) = accel_crust_mantle(1,iboolleft_xi_crust_mantle(ipoin))
+ buffer_send_faces_vector(2,ipoin,1) = accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin))
+ buffer_send_faces_vector(3,ipoin,1) = accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin))
+ enddo
+
! the buffer for the inner core starts right after the buffer for the crust and mantle
ioffset = npoin2D_xi_crust_mantle(1)
- do ipoin = 1,npoin2D_xi_crust_mantle(1)
- buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(1,iboolleft_xi_crust_mantle(ipoin))
- buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin))
- buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin))
- enddo
-
do ipoin = 1,npoin2D_xi_inner_core(1)
- buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(1,iboolleft_xi_inner_core(ipoin))
- buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolleft_xi_inner_core(ipoin))
- buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolleft_xi_inner_core(ipoin))
+ buffer_send_faces_vector(1,ioffset + ipoin,1) = accel_inner_core(1,iboolleft_xi_inner_core(ipoin))
+ buffer_send_faces_vector(2,ioffset + ipoin,1) = accel_inner_core(2,iboolleft_xi_inner_core(ipoin))
+ buffer_send_faces_vector(3,ioffset + ipoin,1) = accel_inner_core(3,iboolleft_xi_inner_core(ipoin))
enddo
! send messages backward along each row
@@ -228,51 +255,64 @@
else
receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
endif
- call MPI_SENDRECV(buffer_send_faces_vector,NDIM*npoin2D_xi_all(1),CUSTOM_MPI_TYPE,receiver, &
- itag2,buffer_received_faces_vector,NDIM*npoin2D_xi_all(2),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
+ call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_xi_all(2),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive,ier)
+ call MPI_ISSEND(buffer_send_faces_vector,NDIM*npoin2D_xi_all(1),CUSTOM_MPI_TYPE,receiver, &
+ itag2,MPI_COMM_WORLD,request_send,ier)
+
+ iphase = iphase + 1
+ return ! exit because we have started some communications therefore we need some time
+
+ endif !!!!!!!!! end of iphase 2
+
+ if(iphase == 3) then
+
+! call MPI_WAIT(request_send,msg_status,ier)
+! call MPI_WAIT(request_receive,msg_status,ier)
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+
! all slices copy the buffer received to the contributions on the right face
if(iproc_xi < NPROC_XI-1) then
do ipoin = 1,npoin2D_xi_crust_mantle(2)
- accel_crust_mantle(1,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(1,ipoin)
- accel_crust_mantle(2,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(2,ipoin)
- accel_crust_mantle(3,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(3,ipoin)
+ accel_crust_mantle(1,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(1,ipoin,1)
+ accel_crust_mantle(2,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(2,ipoin,1)
+ accel_crust_mantle(3,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(3,ipoin,1)
enddo
+! the buffer for the inner core starts right after the buffer for the crust and mantle
ioffset = npoin2D_xi_crust_mantle(2)
+
do ipoin = 1,npoin2D_xi_inner_core(2)
- accel_inner_core(1,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(1,ioffset + ipoin)
- accel_inner_core(2,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(2,ioffset + ipoin)
- accel_inner_core(3,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(3,ioffset + ipoin)
+ accel_inner_core(1,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(1,ioffset + ipoin,1)
+ accel_inner_core(2,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(2,ioffset + ipoin,1)
+ accel_inner_core(3,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(3,ioffset + ipoin,1)
enddo
endif
- endif
-
!----
!---- then assemble along eta using the 2-D topology
!----
-! assemble along eta only if more than one slice
- if(NPROC_ETA > 1) then
+! slices copy the right face into the buffer
+ do ipoin = 1,npoin2D_eta_crust_mantle(2)
+ buffer_send_faces_vector(1,ipoin,1) = accel_crust_mantle(1,iboolright_eta_crust_mantle(ipoin))
+ buffer_send_faces_vector(2,ipoin,1) = accel_crust_mantle(2,iboolright_eta_crust_mantle(ipoin))
+ buffer_send_faces_vector(3,ipoin,1) = accel_crust_mantle(3,iboolright_eta_crust_mantle(ipoin))
+ enddo
! the buffer for the inner core starts right after the buffer for the crust and mantle
ioffset = npoin2D_eta_crust_mantle(2)
-! slices copy the right face into the buffer
- do ipoin = 1,npoin2D_eta_crust_mantle(2)
- buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(1,iboolright_eta_crust_mantle(ipoin))
- buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolright_eta_crust_mantle(ipoin))
- buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolright_eta_crust_mantle(ipoin))
- enddo
-
do ipoin = 1,npoin2D_eta_inner_core(2)
- buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(1,iboolright_eta_inner_core(ipoin))
- buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolright_eta_inner_core(ipoin))
- buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolright_eta_inner_core(ipoin))
+ buffer_send_faces_vector(1,ioffset + ipoin,1) = accel_inner_core(1,iboolright_eta_inner_core(ipoin))
+ buffer_send_faces_vector(2,ioffset + ipoin,1) = accel_inner_core(2,iboolright_eta_inner_core(ipoin))
+ buffer_send_faces_vector(3,ioffset + ipoin,1) = accel_inner_core(3,iboolright_eta_inner_core(ipoin))
enddo
! send messages forward along each row
@@ -286,30 +326,48 @@
else
receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
endif
- call MPI_SENDRECV(buffer_send_faces_vector,NDIM*npoin2D_eta_all(2),CUSTOM_MPI_TYPE,receiver, &
- itag2,buffer_received_faces_vector,NDIM*npoin2D_eta_all(1),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
+ call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_eta_all(1),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive,ier)
+ call MPI_ISSEND(buffer_send_faces_vector,NDIM*npoin2D_eta_all(2),CUSTOM_MPI_TYPE,receiver, &
+ itag2,MPI_COMM_WORLD,request_send,ier)
+
+ iphase = iphase + 1
+ return ! exit because we have started some communications therefore we need some time
+
+ endif !!!!!!!!! end of iphase 3
+
+ if(iphase == 4) then
+
+! call MPI_WAIT(request_send,msg_status,ier)
+! call MPI_WAIT(request_receive,msg_status,ier)
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+
! all slices add the buffer received to the contributions on the left face
if(iproc_eta > 0) then
do ipoin = 1,npoin2D_eta_crust_mantle(1)
accel_crust_mantle(1,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(1,iboolleft_eta_crust_mantle(ipoin)) + &
- buffer_received_faces_vector(1,ipoin)
+ buffer_received_faces_vector(1,ipoin,1)
accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin)) + &
- buffer_received_faces_vector(2,ipoin)
+ buffer_received_faces_vector(2,ipoin,1)
accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin)) + &
- buffer_received_faces_vector(3,ipoin)
+ buffer_received_faces_vector(3,ipoin,1)
enddo
+! the buffer for the inner core starts right after the buffer for the crust and mantle
ioffset = npoin2D_eta_crust_mantle(1)
+
do ipoin = 1,npoin2D_eta_inner_core(1)
accel_inner_core(1,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(1,iboolleft_eta_inner_core(ipoin)) + &
- buffer_received_faces_vector(1,ioffset + ipoin)
+ buffer_received_faces_vector(1,ioffset + ipoin,1)
accel_inner_core(2,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(2,iboolleft_eta_inner_core(ipoin)) + &
- buffer_received_faces_vector(2,ioffset + ipoin)
+ buffer_received_faces_vector(2,ioffset + ipoin,1)
accel_inner_core(3,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(3,iboolleft_eta_inner_core(ipoin)) + &
- buffer_received_faces_vector(3,ioffset + ipoin)
+ buffer_received_faces_vector(3,ioffset + ipoin,1)
enddo
endif
@@ -317,19 +375,19 @@
! the contributions are correctly assembled on the left side of each slice
! now we have to send the result back to the sender
! all slices copy the left face into the buffer
+ do ipoin = 1,npoin2D_eta_crust_mantle(1)
+ buffer_send_faces_vector(1,ipoin,1) = accel_crust_mantle(1,iboolleft_eta_crust_mantle(ipoin))
+ buffer_send_faces_vector(2,ipoin,1) = accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin))
+ buffer_send_faces_vector(3,ipoin,1) = accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin))
+ enddo
+
! the buffer for the inner core starts right after the buffer for the crust and mantle
ioffset = npoin2D_eta_crust_mantle(1)
- do ipoin = 1,npoin2D_eta_crust_mantle(1)
- buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(1,iboolleft_eta_crust_mantle(ipoin))
- buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin))
- buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin))
- enddo
-
do ipoin = 1,npoin2D_eta_inner_core(1)
- buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(1,iboolleft_eta_inner_core(ipoin))
- buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolleft_eta_inner_core(ipoin))
- buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolleft_eta_inner_core(ipoin))
+ buffer_send_faces_vector(1,ioffset + ipoin,1) = accel_inner_core(1,iboolleft_eta_inner_core(ipoin))
+ buffer_send_faces_vector(2,ioffset + ipoin,1) = accel_inner_core(2,iboolleft_eta_inner_core(ipoin))
+ buffer_send_faces_vector(3,ioffset + ipoin,1) = accel_inner_core(3,iboolleft_eta_inner_core(ipoin))
enddo
! send messages backward along each row
@@ -343,37 +401,56 @@
else
receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
endif
- call MPI_SENDRECV(buffer_send_faces_vector,NDIM*npoin2D_eta_all(1),CUSTOM_MPI_TYPE,receiver, &
- itag2,buffer_received_faces_vector,NDIM*npoin2D_eta_all(2),CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
+ call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_eta_all(2),CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive,ier)
+ call MPI_ISSEND(buffer_send_faces_vector,NDIM*npoin2D_eta_all(1),CUSTOM_MPI_TYPE,receiver, &
+ itag2,MPI_COMM_WORLD,request_send,ier)
+
+ iphase = iphase + 1
+ return ! exit because we have started some communications therefore we need some time
+
+ endif !!!!!!!!! end of iphase 4
+
+ if(iphase == 5) then
+
+! call MPI_WAIT(request_send,msg_status,ier)
+! call MPI_WAIT(request_receive,msg_status,ier)
+ call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+
! all slices copy the buffer received to the contributions on the right face
if(iproc_eta < NPROC_ETA-1) then
do ipoin = 1,npoin2D_eta_crust_mantle(2)
- accel_crust_mantle(1,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(1,ipoin)
- accel_crust_mantle(2,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(2,ipoin)
- accel_crust_mantle(3,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(3,ipoin)
+ accel_crust_mantle(1,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(1,ipoin,1)
+ accel_crust_mantle(2,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(2,ipoin,1)
+ accel_crust_mantle(3,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(3,ipoin,1)
enddo
+! the buffer for the inner core starts right after the buffer for the crust and mantle
ioffset = npoin2D_eta_crust_mantle(2)
+
do ipoin = 1,npoin2D_eta_inner_core(2)
- accel_inner_core(1,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(1,ioffset + ipoin)
- accel_inner_core(2,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(2,ioffset + ipoin)
- accel_inner_core(3,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(3,ioffset + ipoin)
+ accel_inner_core(1,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(1,ioffset + ipoin,1)
+ accel_inner_core(2,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(2,ioffset + ipoin,1)
+ accel_inner_core(3,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(3,ioffset + ipoin,1)
enddo
endif
- endif
-
!----
!---- start MPI assembling phase between chunks
!----
! check flag to see if we need to assemble (might be turned off when debugging)
! and do not assemble if only one chunk
- if (.not. ACTUALLY_ASSEMBLE_MPI_CHUNKS .or. NCHUNKS == 1) return
+ if (.not. ACTUALLY_ASSEMBLE_MPI_CHUNKS .or. NCHUNKS == 1) then
+ iphase = 9999 ! this means everything is finished
+ return
+ endif
! ***************************************************************
! transmit messages in forward direction (iprocfrom -> iprocto)
@@ -382,43 +459,41 @@
!---- put slices in receive mode
!---- a given slice can belong to at most two faces
-! use three step scheme that can never deadlock
-! scheme for faces cannot deadlock even if NPROC_XI = NPROC_ETA = 1
- do imsg_loop = 1,NUM_MSG_TYPES
-
icount_faces = 0
do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. &
- myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
sender = iprocfrom_faces(imsg)
! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_faces_crust_mantle(icount_faces)
+ call MPI_IRECV(buffer_received_faces_vector(:,:,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
- call MPI_RECV(buffer_received_faces_vector,NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
+! do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+! accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+! accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(1,ipoin2D,icount_faces)
+! accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+! accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(2,ipoin2D,icount_faces)
+! accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+! accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(3,ipoin2D,icount_faces)
+! enddo
- do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
- accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
- accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(1,ipoin2D)
- accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
- accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(2,ipoin2D)
- accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
- accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(3,ipoin2D)
- enddo
+! the buffer for the inner core starts right after the buffer for the crust and mantle
+! ioffset = npoin2D_faces_crust_mantle(icount_faces)
- do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
- accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
- accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) + buffer_received_faces_vector(1,ioffset + ipoin2D)
- accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
- accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) + buffer_received_faces_vector(2,ioffset + ipoin2D)
- accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
- accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) + buffer_received_faces_vector(3,ioffset + ipoin2D)
- enddo
+! do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+! accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+! accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
+! buffer_received_faces_vector(1,ioffset + ipoin2D,icount_faces)
+! accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+! accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
+! buffer_received_faces_vector(2,ioffset + ipoin2D,icount_faces)
+! accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+! accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
+! buffer_received_faces_vector(3,ioffset + ipoin2D,icount_faces)
+! enddo
endif
enddo
@@ -427,35 +502,90 @@
!---- a given slice can belong to at most two faces
icount_faces = 0
do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. &
- myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
receiver = iprocto_faces(imsg)
! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
+ do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+ buffer_send_faces_vector(1,ipoin2D,icount_faces) = accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+ buffer_send_faces_vector(2,ipoin2D,icount_faces) = accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+ buffer_send_faces_vector(3,ipoin2D,icount_faces) = accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+ enddo
+
! the buffer for the inner core starts right after the buffer for the crust and mantle
ioffset = npoin2D_faces_crust_mantle(icount_faces)
+ do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+ buffer_send_faces_vector(1,ioffset + ipoin2D,icount_faces) = accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces))
+ buffer_send_faces_vector(2,ioffset + ipoin2D,icount_faces) = accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces))
+ buffer_send_faces_vector(3,ioffset + ipoin2D,icount_faces) = accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces))
+ enddo
+
+ call MPI_ISSEND(buffer_send_faces_vector(:,:,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,receiver,itag, &
+ MPI_COMM_WORLD,request_send_array(icount_faces),ier)
+ endif
+ enddo
+
+ iphase = iphase + 1
+ return ! exit because we have started some communications therefore we need some time
+
+ endif !!!!!!!!! end of iphase 5
+
+ if(iphase == 6) then
+
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
+ call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
+ endif
+ enddo
+
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
+ call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ endif
+ enddo
+
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
+
do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
- buffer_send_faces_vector(1,ipoin2D) = accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces))
- buffer_send_faces_vector(2,ipoin2D) = accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces))
- buffer_send_faces_vector(3,ipoin2D) = accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+ accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+ accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(1,ipoin2D,icount_faces)
+ accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+ accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(2,ipoin2D,icount_faces)
+ accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+ accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(3,ipoin2D,icount_faces)
enddo
+! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_faces_crust_mantle(icount_faces)
+
do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
- buffer_send_faces_vector(1,ioffset + ipoin2D) = accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces))
- buffer_send_faces_vector(2,ioffset + ipoin2D) = accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces))
- buffer_send_faces_vector(3,ioffset + ipoin2D) = accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces))
+ accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
+ buffer_received_faces_vector(1,ioffset + ipoin2D,icount_faces)
+ accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
+ buffer_received_faces_vector(2,ioffset + ipoin2D,icount_faces)
+ accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
+ buffer_received_faces_vector(3,ioffset + ipoin2D,icount_faces)
enddo
- call MPI_SEND(buffer_send_faces_vector,NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
-
endif
enddo
-
! *********************************************************************
! transmit messages back in opposite direction (iprocto -> iprocfrom)
! *********************************************************************
@@ -465,31 +595,33 @@
icount_faces = 0
do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. &
- myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
sender = iprocto_faces(imsg)
! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
-! the buffer for the inner core starts right after the buffer for the crust and mantle
- ioffset = npoin2D_faces_crust_mantle(icount_faces)
+ call MPI_IRECV(buffer_received_faces_vector(:,:,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,sender, &
+ itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
- call MPI_RECV(buffer_received_faces_vector,NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,sender, &
- itag,MPI_COMM_WORLD,msg_status,ier)
+! do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+! accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(1,ipoin2D,icount_faces)
+! accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(2,ipoin2D,icount_faces)
+! accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(3,ipoin2D,icount_faces)
+! enddo
- do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
- accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(1,ipoin2D)
- accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(2,ipoin2D)
- accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(3,ipoin2D)
- enddo
+! the buffer for the inner core starts right after the buffer for the crust and mantle
+! ioffset = npoin2D_faces_crust_mantle(icount_faces)
- do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
- accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = buffer_received_faces_vector(1,ioffset + ipoin2D)
- accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = buffer_received_faces_vector(2,ioffset + ipoin2D)
- accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = buffer_received_faces_vector(3,ioffset + ipoin2D)
- enddo
+! do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+! accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+! buffer_received_faces_vector(1,ioffset + ipoin2D,icount_faces)
+! accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+! buffer_received_faces_vector(2,ioffset + ipoin2D,icount_faces)
+! accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+! buffer_received_faces_vector(3,ioffset + ipoin2D,icount_faces)
+! enddo
endif
enddo
@@ -498,38 +630,87 @@
!---- a given slice can belong to at most two faces
icount_faces = 0
do imsg = 1,NUMMSGS_FACES
- if(myrank==iprocfrom_faces(imsg) .or. &
- myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
- if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
receiver = iprocfrom_faces(imsg)
! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
+ do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+ buffer_send_faces_vector(1,ipoin2D,icount_faces) = accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+ buffer_send_faces_vector(2,ipoin2D,icount_faces) = accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+ buffer_send_faces_vector(3,ipoin2D,icount_faces) = accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+ enddo
+
! the buffer for the inner core starts right after the buffer for the crust and mantle
ioffset = npoin2D_faces_crust_mantle(icount_faces)
- do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
- buffer_send_faces_vector(1,ipoin2D) = accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces))
- buffer_send_faces_vector(2,ipoin2D) = accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces))
- buffer_send_faces_vector(3,ipoin2D) = accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces))
- enddo
-
do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
- buffer_send_faces_vector(1,ioffset + ipoin2D) = accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces))
- buffer_send_faces_vector(2,ioffset + ipoin2D) = accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces))
- buffer_send_faces_vector(3,ioffset + ipoin2D) = accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces))
+ buffer_send_faces_vector(1,ioffset + ipoin2D,icount_faces) = accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces))
+ buffer_send_faces_vector(2,ioffset + ipoin2D,icount_faces) = accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces))
+ buffer_send_faces_vector(3,ioffset + ipoin2D,icount_faces) = accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces))
enddo
- call MPI_SEND(buffer_send_faces_vector,NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+ call MPI_ISSEND(buffer_send_faces_vector(:,:,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,receiver,itag, &
+ MPI_COMM_WORLD,request_send_array(icount_faces),ier)
+ endif
+ enddo
+ iphase = iphase + 1
+ return ! exit because we have started some communications therefore we need some time
+
+ endif !!!!!!!!! end of iphase 6
+
+ if(iphase == 7) then
+
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocto_faces(imsg)) then
+ call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not received yet
endif
enddo
-! end of anti-deadlocking loop
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
+ call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
+ if(.not. flag_result_test) return ! exit if message not sent yet
+ endif
enddo
+ icount_faces = 0
+ do imsg = 1,NUMMSGS_FACES
+ if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+ if(myrank==iprocfrom_faces(imsg)) then
+ do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+ accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(1,ipoin2D,icount_faces)
+ accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(2,ipoin2D,icount_faces)
+ accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(3,ipoin2D,icount_faces)
+ enddo
+! the buffer for the inner core starts right after the buffer for the crust and mantle
+ ioffset = npoin2D_faces_crust_mantle(icount_faces)
+
+ do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+ accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ buffer_received_faces_vector(1,ioffset + ipoin2D,icount_faces)
+ accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ buffer_received_faces_vector(2,ioffset + ipoin2D,icount_faces)
+ accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+ buffer_received_faces_vector(3,ioffset + ipoin2D,icount_faces)
+ enddo
+ endif
+ enddo
+
+! this is the exit condition, to go beyond the last phase number
+ iphase = iphase + 1
+
+!! DK DK do the rest in blocking for now, for simplicity
+
!----
!---- start MPI assembling corners
!----
@@ -703,5 +884,7 @@
enddo
+ endif !!!!!!!!! end of iphase 7
+
end subroutine assemble_MPI_vector
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_crust_mantle.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_crust_mantle.f90 2011-02-22 01:39:25 UTC (rev 17931)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_crust_mantle.f90 2011-02-22 01:39:52 UTC (rev 17932)
@@ -26,8 +26,26 @@
!=====================================================================
subroutine compute_forces_crust_mantle(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ,accel,xstore,ystore,zstore, &
+ displ_crust_mantle,accel_crust_mantle,xstore,ystore,zstore, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+!----------------------
+ is_on_a_slice_edge_crust_mantle,icall, &
+ accel_inner_core,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
hprime_xx,hprime_yy,hprime_zz, &
hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
@@ -70,7 +88,7 @@
integer, dimension(NSPEC_CRUST_MANTLE) :: idoubling
! displacement and acceleration
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: displ,accel
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: displ_crust_mantle,accel_crust_mantle
! memory variables for attenuation
! memory variables R_ij are stored at the local rather than global level
@@ -175,14 +193,109 @@
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
+! this for non blocking MPI
+ integer :: iphase,icall
+
+ integer :: computed_elements
+
+ logical, dimension(NSPEC_CRUST_MANTLE) :: is_on_a_slice_edge_crust_mantle
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: accel_inner_core
+
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
+
+ integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
+
+ integer :: ichunk,iproc_xi,iproc_eta,myrank
+
+ integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
+
+ integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
+ integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
+
+ integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
+ integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
+
+ integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
+ integer npoin2D_faces_inner_core(NUMFACES_SHARED)
+
+ integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ npoin2D_xi_inner_core,npoin2D_eta_inner_core
+
+! communication pattern for faces between chunks
+ integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces
+
+! communication pattern for corners between chunks
+ integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+ integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
+ integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
+
+ integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
+ integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
+
+ integer :: npoin2D_max_all_CM_IC
+ real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC) :: buffer_send_faces,buffer_received_faces
+
+! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC) :: &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector
+
+! for matching with central cube in inner core
+ integer nb_msgs_theor_in_cube, npoin2D_cube_from_slices,iphase_CC
+ integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
+ double precision, dimension(npoin2D_cube_from_slices,NDIM) :: buffer_slices
+ double precision, dimension(npoin2D_cube_from_slices,NDIM,nb_msgs_theor_in_cube) :: buffer_all_cube_from_slices
+ integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
+ integer receiver_cube_from_slices
+ logical :: INCLUDE_CENTRAL_CUBE
+
+! local to global mapping
+ integer NSPEC2D_BOTTOM_INNER_CORE
+ integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
+
! ****************************************************
! big loop over all spectral elements in the solid
! ****************************************************
-! set acceleration to zero
-! accel(:,:) = 0._CUSTOM_REAL
+ computed_elements = 0
do ispec = 1,NSPEC_CRUST_MANTLE
+
+! hide communications by computing the edges first
+ if((icall == 2 .and. is_on_a_slice_edge_crust_mantle(ispec)) .or. &
+ (icall == 1 .and. .not. is_on_a_slice_edge_crust_mantle(ispec))) cycle
+
+! process the communications every ELEMENTS_NONBLOCKING elements
+ computed_elements = computed_elements + 1
+ if (USE_NONBLOCKING_COMMS .and. icall == 2 .and. mod(computed_elements,ELEMENTS_NONBLOCKING_CM_IC) == 0) then
+
+ if(iphase <= 7) call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
+ iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector, &
+ NUMMSGS_FACES_VAL,NCORNERSCHUNKS_VAL, &
+ NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL_CM, &
+ NGLOB1D_RADIAL_IC,NCHUNKS_VAL,iphase)
+
+ if(INCLUDE_CENTRAL_CUBE) then
+ if(iphase > 7 .and. iphase_CC <= 4) &
+ call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
+ ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,accel_inner_core,NDIM,iphase_CC)
+ endif
+
+ endif
+
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -202,25 +315,25 @@
do l=1,NGLLX
hp1 = hprime_xx(i,l)
iglob = ibool(l,j,k,ispec)
- tempx1l = tempx1l + displ(1,iglob)*hp1
- tempy1l = tempy1l + displ(2,iglob)*hp1
- tempz1l = tempz1l + displ(3,iglob)*hp1
+ tempx1l = tempx1l + displ_crust_mantle(1,iglob)*hp1
+ tempy1l = tempy1l + displ_crust_mantle(2,iglob)*hp1
+ tempz1l = tempz1l + displ_crust_mantle(3,iglob)*hp1
!!! can merge these loops because NGLLX = NGLLY = NGLLZ enddo
!!! can merge these loops because NGLLX = NGLLY = NGLLZ do l=1,NGLLY
hp2 = hprime_yy(j,l)
iglob = ibool(i,l,k,ispec)
- tempx2l = tempx2l + displ(1,iglob)*hp2
- tempy2l = tempy2l + displ(2,iglob)*hp2
- tempz2l = tempz2l + displ(3,iglob)*hp2
+ tempx2l = tempx2l + displ_crust_mantle(1,iglob)*hp2
+ tempy2l = tempy2l + displ_crust_mantle(2,iglob)*hp2
+ tempz2l = tempz2l + displ_crust_mantle(3,iglob)*hp2
!!! can merge these loops because NGLLX = NGLLY = NGLLZ enddo
!!! can merge these loops because NGLLX = NGLLY = NGLLZ do l=1,NGLLZ
hp3 = hprime_zz(k,l)
iglob = ibool(i,j,l,ispec)
- tempx3l = tempx3l + displ(1,iglob)*hp3
- tempy3l = tempy3l + displ(2,iglob)*hp3
- tempz3l = tempz3l + displ(3,iglob)*hp3
+ tempx3l = tempx3l + displ_crust_mantle(1,iglob)*hp3
+ tempy3l = tempy3l + displ_crust_mantle(2,iglob)*hp3
+ tempz3l = tempz3l + displ_crust_mantle(3,iglob)*hp3
enddo
! get derivatives of ux, uy and uz with respect to x, y and z
@@ -646,9 +759,9 @@
if(CUSTOM_REAL == SIZE_REAL) then
! get displacement and multiply by density to compute G tensor
- sx_l = rho * dble(displ(1,iglob))
- sy_l = rho * dble(displ(2,iglob))
- sz_l = rho * dble(displ(3,iglob))
+ sx_l = rho * dble(displ_crust_mantle(1,iglob))
+ sy_l = rho * dble(displ_crust_mantle(2,iglob))
+ sz_l = rho * dble(displ_crust_mantle(3,iglob))
! compute G tensor from s . g and add to sigma (not symmetric)
sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
@@ -673,9 +786,9 @@
else
! get displacement and multiply by density to compute G tensor
- sx_l = rho * displ(1,iglob)
- sy_l = rho * displ(2,iglob)
- sz_l = rho * displ(3,iglob)
+ sx_l = rho * displ_crust_mantle(1,iglob)
+ sy_l = rho * displ_crust_mantle(2,iglob)
+ sz_l = rho * displ_crust_mantle(3,iglob)
! compute G tensor from s . g and add to sigma (not symmetric)
sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
@@ -774,9 +887,9 @@
do j=1,NGLLY
do i=1,NGLLX
iglob = ibool(i,j,k,ispec)
- accel(1,iglob) = accel(1,iglob) + sum_terms(1,i,j,k)
- accel(2,iglob) = accel(2,iglob) + sum_terms(2,i,j,k)
- accel(3,iglob) = accel(3,iglob) + sum_terms(3,i,j,k)
+ accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(1,i,j,k)
+ accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(2,i,j,k)
+ accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(3,i,j,k)
enddo
enddo
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_crust_mantle_Dev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_crust_mantle_Dev.f90 2011-02-22 01:39:25 UTC (rev 17931)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_crust_mantle_Dev.f90 2011-02-22 01:39:52 UTC (rev 17932)
@@ -25,10 +25,27 @@
!
!=====================================================================
-
subroutine compute_forces_crust_mantle_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ,accel,xstore,ystore,zstore, &
+ displ_crust_mantle,accel_crust_mantle,xstore,ystore,zstore, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+!----------------------
+ is_on_a_slice_edge_crust_mantle,icall, &
+ accel_inner_core,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
hprime_xx,hprime_xxT, &
hprimewgll_xx,hprimewgll_xxT, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
@@ -39,9 +56,8 @@
ibool,idoubling,R_memory,epsilondev,epsilon_trace_over_3,one_minus_sum_beta, &
alphaval,betaval,gammaval,factor_common,vx,vy,vz,vnspec)
+! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
-! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5
-
implicit none
include "constants.h"
@@ -51,7 +67,7 @@
include "OUTPUT_FILES/values_from_mesher.h"
! displacement and acceleration
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: displ,accel
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: displ_crust_mantle,accel_crust_mantle
! arrays with mesh parameters per slice
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool
@@ -182,22 +198,117 @@
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
- !real(kind=CUSTOM_REAL), dimension(5) :: dummy
-
integer :: i_SLS,i_memory,imodulo_N_SLS
integer :: ispec,ispec_strain
integer :: i,j,k
integer :: int_radius
integer :: iglob1,iglob2,iglob3,iglob4,iglob5
+! this for non blocking MPI
+ integer :: iphase,icall
+
+ integer :: computed_elements
+
+ logical, dimension(NSPEC_CRUST_MANTLE) :: is_on_a_slice_edge_crust_mantle
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: accel_inner_core
+
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
+
+ integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
+
+ integer :: ichunk,iproc_xi,iproc_eta,myrank
+
+ integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
+
+ integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
+ integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
+
+ integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
+ integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
+
+ integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
+ integer npoin2D_faces_inner_core(NUMFACES_SHARED)
+
+ integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ npoin2D_xi_inner_core,npoin2D_eta_inner_core
+
+! communication pattern for faces between chunks
+ integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces
+
+! communication pattern for corners between chunks
+ integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+ integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
+ integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
+
+ integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
+ integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
+
+ integer :: npoin2D_max_all_CM_IC
+ real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC) :: buffer_send_faces,buffer_received_faces
+
+! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC) :: &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector
+
+! for matching with central cube in inner core
+ integer nb_msgs_theor_in_cube, npoin2D_cube_from_slices,iphase_CC
+ integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
+ double precision, dimension(npoin2D_cube_from_slices,NDIM) :: buffer_slices
+ double precision, dimension(npoin2D_cube_from_slices,NDIM,nb_msgs_theor_in_cube) :: buffer_all_cube_from_slices
+ integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
+ integer receiver_cube_from_slices
+ logical :: INCLUDE_CENTRAL_CUBE
+
+! local to global mapping
+ integer NSPEC2D_BOTTOM_INNER_CORE
+ integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
+
! ****************************************************
! big loop over all spectral elements in the solid
! ****************************************************
imodulo_N_SLS = mod(N_SLS,3)
+ computed_elements = 0
+
do ispec = 1,NSPEC_CRUST_MANTLE
+! hide communications by computing the edges first
+ if((icall == 2 .and. is_on_a_slice_edge_crust_mantle(ispec)) .or. &
+ (icall == 1 .and. .not. is_on_a_slice_edge_crust_mantle(ispec))) cycle
+
+! process the communications every ELEMENTS_NONBLOCKING elements
+ computed_elements = computed_elements + 1
+ if (USE_NONBLOCKING_COMMS .and. icall == 2 .and. mod(computed_elements,ELEMENTS_NONBLOCKING_CM_IC) == 0) then
+
+ if(iphase <= 7) call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
+ iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector, &
+ NUMMSGS_FACES_VAL,NCORNERSCHUNKS_VAL, &
+ NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL_CM, &
+ NGLOB1D_RADIAL_IC,NCHUNKS_VAL,iphase)
+
+ if(INCLUDE_CENTRAL_CUBE) then
+ if(iphase > 7 .and. iphase_CC <= 4) &
+ call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
+ ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,accel_inner_core,NDIM,iphase_CC)
+ endif
+
+ endif
+
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
@@ -207,9 +318,9 @@
! way 1:
! do i=1,NGLLX
! iglob = ibool(i,j,k,ispec)
-! dummyx_loc(i,j,k) = displ(1,iglob)
-! dummyy_loc(i,j,k) = displ(2,iglob)
-! dummyz_loc(i,j,k) = displ(3,iglob)
+! dummyx_loc(i,j,k) = displ_crust_mantle(1,iglob)
+! dummyy_loc(i,j,k) = displ_crust_mantle(2,iglob)
+! dummyz_loc(i,j,k) = displ_crust_mantle(3,iglob)
! enddo
! way 2:
@@ -220,25 +331,25 @@
iglob4 = ibool(4,j,k,ispec)
iglob5 = ibool(5,j,k,ispec)
- dummyx_loc(1,j,k) = displ(1,iglob1)
- dummyy_loc(1,j,k) = displ(2,iglob1)
- dummyz_loc(1,j,k) = displ(3,iglob1)
+ dummyx_loc(1,j,k) = displ_crust_mantle(1,iglob1)
+ dummyy_loc(1,j,k) = displ_crust_mantle(2,iglob1)
+ dummyz_loc(1,j,k) = displ_crust_mantle(3,iglob1)
- dummyx_loc(2,j,k) = displ(1,iglob2)
- dummyy_loc(2,j,k) = displ(2,iglob2)
- dummyz_loc(2,j,k) = displ(3,iglob2)
+ dummyx_loc(2,j,k) = displ_crust_mantle(1,iglob2)
+ dummyy_loc(2,j,k) = displ_crust_mantle(2,iglob2)
+ dummyz_loc(2,j,k) = displ_crust_mantle(3,iglob2)
- dummyx_loc(3,j,k) = displ(1,iglob3)
- dummyy_loc(3,j,k) = displ(2,iglob3)
- dummyz_loc(3,j,k) = displ(3,iglob3)
+ dummyx_loc(3,j,k) = displ_crust_mantle(1,iglob3)
+ dummyy_loc(3,j,k) = displ_crust_mantle(2,iglob3)
+ dummyz_loc(3,j,k) = displ_crust_mantle(3,iglob3)
- dummyx_loc(4,j,k) = displ(1,iglob4)
- dummyy_loc(4,j,k) = displ(2,iglob4)
- dummyz_loc(4,j,k) = displ(3,iglob4)
+ dummyx_loc(4,j,k) = displ_crust_mantle(1,iglob4)
+ dummyy_loc(4,j,k) = displ_crust_mantle(2,iglob4)
+ dummyz_loc(4,j,k) = displ_crust_mantle(3,iglob4)
- dummyx_loc(5,j,k) = displ(1,iglob5)
- dummyy_loc(5,j,k) = displ(2,iglob5)
- dummyz_loc(5,j,k) = displ(3,iglob5)
+ dummyx_loc(5,j,k) = displ_crust_mantle(1,iglob5)
+ dummyy_loc(5,j,k) = displ_crust_mantle(2,iglob5)
+ dummyz_loc(5,j,k) = displ_crust_mantle(3,iglob5)
enddo
enddo
@@ -792,9 +903,9 @@
if(CUSTOM_REAL == SIZE_REAL) then
! get displacement and multiply by density to compute G tensor
- sx_l = rho * dble(displ(1,iglob1))
- sy_l = rho * dble(displ(2,iglob1))
- sz_l = rho * dble(displ(3,iglob1))
+ sx_l = rho * dble(displ_crust_mantle(1,iglob1))
+ sy_l = rho * dble(displ_crust_mantle(2,iglob1))
+ sz_l = rho * dble(displ_crust_mantle(3,iglob1))
! compute G tensor from s . g and add to sigma (not symmetric)
sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
@@ -819,9 +930,9 @@
else
! get displacement and multiply by density to compute G tensor
- sx_l = rho * displ(1,iglob1)
- sy_l = rho * displ(2,iglob1)
- sz_l = rho * displ(3,iglob1)
+ sx_l = rho * displ_crust_mantle(1,iglob1)
+ sy_l = rho * displ_crust_mantle(2,iglob1)
+ sz_l = rho * displ_crust_mantle(3,iglob1)
! compute G tensor from s . g and add to sigma (not symmetric)
sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
@@ -961,15 +1072,15 @@
! way 1:
! do i=1,NGLLX
! iglob = ibool(i,j,k,ispec)
-! accel(:,iglob) = accel(:,iglob) + sum_terms(:,i,j,k)
+! accel_crust_mantle(:,iglob) = accel_crust_mantle(:,iglob) + sum_terms(:,i,j,k)
! enddo
! way 2:
- accel(:,ibool(1,j,k,ispec)) = accel(:,ibool(1,j,k,ispec)) + sum_terms(:,1,j,k)
- accel(:,ibool(2,j,k,ispec)) = accel(:,ibool(2,j,k,ispec)) + sum_terms(:,2,j,k)
- accel(:,ibool(3,j,k,ispec)) = accel(:,ibool(3,j,k,ispec)) + sum_terms(:,3,j,k)
- accel(:,ibool(4,j,k,ispec)) = accel(:,ibool(4,j,k,ispec)) + sum_terms(:,4,j,k)
- accel(:,ibool(5,j,k,ispec)) = accel(:,ibool(5,j,k,ispec)) + sum_terms(:,5,j,k)
+ accel_crust_mantle(:,ibool(1,j,k,ispec)) = accel_crust_mantle(:,ibool(1,j,k,ispec)) + sum_terms(:,1,j,k)
+ accel_crust_mantle(:,ibool(2,j,k,ispec)) = accel_crust_mantle(:,ibool(2,j,k,ispec)) + sum_terms(:,2,j,k)
+ accel_crust_mantle(:,ibool(3,j,k,ispec)) = accel_crust_mantle(:,ibool(3,j,k,ispec)) + sum_terms(:,3,j,k)
+ accel_crust_mantle(:,ibool(4,j,k,ispec)) = accel_crust_mantle(:,ibool(4,j,k,ispec)) + sum_terms(:,4,j,k)
+ accel_crust_mantle(:,ibool(5,j,k,ispec)) = accel_crust_mantle(:,ibool(5,j,k,ispec)) + sum_terms(:,5,j,k)
enddo
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_inner_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_inner_core.f90 2011-02-22 01:39:25 UTC (rev 17931)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_inner_core.f90 2011-02-22 01:39:52 UTC (rev 17932)
@@ -26,8 +26,26 @@
!=====================================================================
subroutine compute_forces_inner_core(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ,accel,xstore,ystore,zstore, &
+ displ_inner_core,accel_inner_core,xstore,ystore,zstore, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+!----------------------
+ is_on_a_slice_edge_inner_core,icall, &
+ accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
hprime_xx,hprime_yy,hprime_zz, &
hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
@@ -45,7 +63,7 @@
include "OUTPUT_FILES/values_from_mesher.h"
! displacement and acceleration
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: displ,accel
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: displ_inner_core,accel_inner_core
! for attenuation
! memory variables R_ij are stored at the local rather than global level
@@ -131,15 +149,112 @@
double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: xstore,ystore,zstore
+! this for non blocking MPI
+ integer :: iphase,icall
+
+ integer :: computed_elements
+
+ logical, dimension(NSPEC_INNER_CORE) :: is_on_a_slice_edge_inner_core
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: accel_crust_mantle
+
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
+
+ integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
+
+ integer :: ichunk,iproc_xi,iproc_eta,myrank
+
+ integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
+
+ integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
+ integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
+
+ integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
+ integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
+
+ integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
+ integer npoin2D_faces_inner_core(NUMFACES_SHARED)
+
+ integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ npoin2D_xi_inner_core,npoin2D_eta_inner_core
+
+! communication pattern for faces between chunks
+ integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces
+
+! communication pattern for corners between chunks
+ integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+ integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
+ integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
+
+ integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
+ integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
+
+ integer :: npoin2D_max_all_CM_IC
+ real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC) :: buffer_send_faces,buffer_received_faces
+
+! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC) :: &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector
+
+! for matching with central cube in inner core
+ integer nb_msgs_theor_in_cube, npoin2D_cube_from_slices,iphase_CC
+ integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
+ double precision, dimension(npoin2D_cube_from_slices,NDIM) :: buffer_slices
+ double precision, dimension(npoin2D_cube_from_slices,NDIM,nb_msgs_theor_in_cube) :: buffer_all_cube_from_slices
+ integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
+ integer receiver_cube_from_slices
+ logical :: INCLUDE_CENTRAL_CUBE
+
+! local to global mapping
+ integer NSPEC2D_BOTTOM_INNER_CORE
+ integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
+
! ****************************************************
! big loop over all spectral elements in the solid
! ****************************************************
+ computed_elements = 0
+
do ispec = 1,NSPEC_INNER_CORE
+! hide communications by computing the edges first
+ if((icall == 2 .and. is_on_a_slice_edge_inner_core(ispec)) .or. &
+ (icall == 1 .and. .not. is_on_a_slice_edge_inner_core(ispec))) cycle
+
! exclude fictitious elements in central cube
if(idoubling(ispec) /= IFLAG_IN_FICTITIOUS_CUBE) then
+! process the communications every ELEMENTS_NONBLOCKING elements
+ computed_elements = computed_elements + 1
+ if (USE_NONBLOCKING_COMMS .and. icall == 2 .and. mod(computed_elements,ELEMENTS_NONBLOCKING_CM_IC) == 0) then
+
+ if(iphase <= 7) call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
+ iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector, &
+ NUMMSGS_FACES_VAL,NCORNERSCHUNKS_VAL, &
+ NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL_CM, &
+ NGLOB1D_RADIAL_IC,NCHUNKS_VAL,iphase)
+
+ if(INCLUDE_CENTRAL_CUBE) then
+ if(iphase > 7 .and. iphase_CC <= 4) &
+ call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
+ ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,accel_inner_core,NDIM,iphase_CC)
+ endif
+
+ endif
+
do k=1,NGLLZ
do j=1,NGLLY
do i=1,NGLLX
@@ -159,25 +274,25 @@
do l=1,NGLLX
hp1 = hprime_xx(i,l)
iglob = ibool(l,j,k,ispec)
- tempx1l = tempx1l + displ(1,iglob)*hp1
- tempy1l = tempy1l + displ(2,iglob)*hp1
- tempz1l = tempz1l + displ(3,iglob)*hp1
+ tempx1l = tempx1l + displ_inner_core(1,iglob)*hp1
+ tempy1l = tempy1l + displ_inner_core(2,iglob)*hp1
+ tempz1l = tempz1l + displ_inner_core(3,iglob)*hp1
!!! can merge these loops because NGLLX = NGLLY = NGLLZ enddo
!!! can merge these loops because NGLLX = NGLLY = NGLLZ do l=1,NGLLY
hp2 = hprime_yy(j,l)
iglob = ibool(i,l,k,ispec)
- tempx2l = tempx2l + displ(1,iglob)*hp2
- tempy2l = tempy2l + displ(2,iglob)*hp2
- tempz2l = tempz2l + displ(3,iglob)*hp2
+ tempx2l = tempx2l + displ_inner_core(1,iglob)*hp2
+ tempy2l = tempy2l + displ_inner_core(2,iglob)*hp2
+ tempz2l = tempz2l + displ_inner_core(3,iglob)*hp2
!!! can merge these loops because NGLLX = NGLLY = NGLLZ enddo
!!! can merge these loops because NGLLX = NGLLY = NGLLZ do l=1,NGLLZ
hp3 = hprime_zz(k,l)
iglob = ibool(i,j,l,ispec)
- tempx3l = tempx3l + displ(1,iglob)*hp3
- tempy3l = tempy3l + displ(2,iglob)*hp3
- tempz3l = tempz3l + displ(3,iglob)*hp3
+ tempx3l = tempx3l + displ_inner_core(1,iglob)*hp3
+ tempy3l = tempy3l + displ_inner_core(2,iglob)*hp3
+ tempz3l = tempz3l + displ_inner_core(3,iglob)*hp3
enddo
! get derivatives of ux, uy and uz with respect to x, y and z
@@ -381,9 +496,9 @@
if(CUSTOM_REAL == SIZE_REAL) then
! get displacement and multiply by density to compute G tensor
- sx_l = rho * dble(displ(1,iglob))
- sy_l = rho * dble(displ(2,iglob))
- sz_l = rho * dble(displ(3,iglob))
+ sx_l = rho * dble(displ_inner_core(1,iglob))
+ sy_l = rho * dble(displ_inner_core(2,iglob))
+ sz_l = rho * dble(displ_inner_core(3,iglob))
! compute G tensor from s . g and add to sigma (not symmetric)
sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
@@ -408,9 +523,9 @@
else
! get displacement and multiply by density to compute G tensor
- sx_l = rho * displ(1,iglob)
- sy_l = rho * displ(2,iglob)
- sz_l = rho * displ(3,iglob)
+ sx_l = rho * displ_inner_core(1,iglob)
+ sy_l = rho * displ_inner_core(2,iglob)
+ sz_l = rho * displ_inner_core(3,iglob)
! compute G tensor from s . g and add to sigma (not symmetric)
sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
@@ -510,7 +625,7 @@
do j=1,NGLLY
do i=1,NGLLX
iglob = ibool(i,j,k,ispec)
- accel(:,iglob) = accel(:,iglob) + sum_terms(:,i,j,k)
+ accel_inner_core(:,iglob) = accel_inner_core(:,iglob) + sum_terms(:,i,j,k)
enddo
enddo
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_inner_core_Dev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_inner_core_Dev.f90 2011-02-22 01:39:25 UTC (rev 17931)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_inner_core_Dev.f90 2011-02-22 01:39:52 UTC (rev 17932)
@@ -25,10 +25,27 @@
!
!=====================================================================
-
subroutine compute_forces_inner_core_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
- displ,accel,xstore,ystore,zstore, &
+ displ_inner_core,accel_inner_core,xstore,ystore,zstore, &
xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+!----------------------
+ is_on_a_slice_edge_inner_core,icall, &
+ accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
kappavstore,muvstore,ibool,idoubling, &
@@ -36,6 +53,8 @@
one_minus_sum_beta,alphaval,betaval,gammaval,factor_common, &
vx,vy,vz,vnspec)
+! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
+
implicit none
include "constants.h"
@@ -45,7 +64,7 @@
include "OUTPUT_FILES/values_from_mesher.h"
! displacement and acceleration
- real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: displ,accel
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: displ_inner_core,accel_inner_core
integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool
real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: xstore,ystore,zstore
@@ -127,7 +146,6 @@
equivalence(newtempy3,E2_mxm_m2_m1_5points)
equivalence(newtempz3,E3_mxm_m2_m1_5points)
-
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sum_terms
real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
real(kind=CUSTOM_REAL) R_xx_val1,R_yy_val1,R_xx_val2,R_yy_val2,R_xx_val3,R_yy_val3
@@ -157,26 +175,120 @@
real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
- !real(kind=CUSTOM_REAL), dimension(5) :: dummy
-
integer :: int_radius
integer :: ispec,ispec_strain
integer :: i,j,k !,l
integer :: i_SLS,i_memory,imodulo_N_SLS
integer :: iglob1,iglob2,iglob3,iglob4,iglob5
+! this for non blocking MPI
+ integer :: iphase,icall
+ integer :: computed_elements
+
+ logical, dimension(NSPEC_INNER_CORE) :: is_on_a_slice_edge_inner_core
+
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: accel_crust_mantle
+
+ integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
+
+ integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
+
+ integer :: ichunk,iproc_xi,iproc_eta,myrank
+
+ integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
+
+ integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
+ integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
+
+ integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
+ integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
+
+ integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
+ integer npoin2D_faces_inner_core(NUMFACES_SHARED)
+
+ integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ npoin2D_xi_inner_core,npoin2D_eta_inner_core
+
+! communication pattern for faces between chunks
+ integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces
+
+! communication pattern for corners between chunks
+ integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+ integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
+ integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
+
+ integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
+ integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
+
+ integer :: npoin2D_max_all_CM_IC
+ real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC) :: buffer_send_faces,buffer_received_faces
+
+! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+ real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC) :: &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector
+
+! for matching with central cube in inner core
+ integer nb_msgs_theor_in_cube, npoin2D_cube_from_slices,iphase_CC
+ integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
+ double precision, dimension(npoin2D_cube_from_slices,NDIM) :: buffer_slices
+ double precision, dimension(npoin2D_cube_from_slices,NDIM,nb_msgs_theor_in_cube) :: buffer_all_cube_from_slices
+ integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
+ integer receiver_cube_from_slices
+ logical :: INCLUDE_CENTRAL_CUBE
+
+! local to global mapping
+ integer NSPEC2D_BOTTOM_INNER_CORE
+ integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
+
! ****************************************************
! big loop over all spectral elements in the solid
! ****************************************************
imodulo_N_SLS = mod(N_SLS,3)
+ computed_elements = 0
+
do ispec = 1,NSPEC_INNER_CORE
+! hide communications by computing the edges first
+ if((icall == 2 .and. is_on_a_slice_edge_inner_core(ispec)) .or. &
+ (icall == 1 .and. .not. is_on_a_slice_edge_inner_core(ispec))) cycle
+
! exclude fictitious elements in central cube
if(idoubling(ispec) /= IFLAG_IN_FICTITIOUS_CUBE) then
+! process the communications every ELEMENTS_NONBLOCKING elements
+ computed_elements = computed_elements + 1
+ if (USE_NONBLOCKING_COMMS .and. icall == 2 .and. mod(computed_elements,ELEMENTS_NONBLOCKING_CM_IC) == 0) then
+
+ if(iphase <= 7) call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
+ iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector, &
+ NUMMSGS_FACES_VAL,NCORNERSCHUNKS_VAL, &
+ NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL_CM, &
+ NGLOB1D_RADIAL_IC,NCHUNKS_VAL,iphase)
+
+ if(INCLUDE_CENTRAL_CUBE) then
+ if(iphase > 7 .and. iphase_CC <= 4) &
+ call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
+ ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,accel_inner_core,NDIM,iphase_CC)
+ endif
+
+ endif
+
! subroutines adapted from Deville, Fischer and Mund, High-order methods
! for incompressible fluid flow, Cambridge University Press (2002),
! pages 386 and 389 and Figure 8.3.1
@@ -185,9 +297,9 @@
! way 1:
! do i=1,NGLLX
! iglob = ibool(i,j,k,ispec)
-! dummyx_loc(i,j,k) = displ(1,iglob)
-! dummyy_loc(i,j,k) = displ(2,iglob)
-! dummyz_loc(i,j,k) = displ(3,iglob)
+! dummyx_loc(i,j,k) = displ_inner_core(1,iglob)
+! dummyy_loc(i,j,k) = displ_inner_core(2,iglob)
+! dummyz_loc(i,j,k) = displ_inner_core(3,iglob)
! enddo
! way 2:
@@ -198,25 +310,25 @@
iglob4 = ibool(4,j,k,ispec)
iglob5 = ibool(5,j,k,ispec)
- dummyx_loc(1,j,k) = displ(1,iglob1)
- dummyy_loc(1,j,k) = displ(2,iglob1)
- dummyz_loc(1,j,k) = displ(3,iglob1)
+ dummyx_loc(1,j,k) = displ_inner_core(1,iglob1)
+ dummyy_loc(1,j,k) = displ_inner_core(2,iglob1)
+ dummyz_loc(1,j,k) = displ_inner_core(3,iglob1)
- dummyx_loc(2,j,k) = displ(1,iglob2)
- dummyy_loc(2,j,k) = displ(2,iglob2)
- dummyz_loc(2,j,k) = displ(3,iglob2)
+ dummyx_loc(2,j,k) = displ_inner_core(1,iglob2)
+ dummyy_loc(2,j,k) = displ_inner_core(2,iglob2)
+ dummyz_loc(2,j,k) = displ_inner_core(3,iglob2)
- dummyx_loc(3,j,k) = displ(1,iglob3)
- dummyy_loc(3,j,k) = displ(2,iglob3)
- dummyz_loc(3,j,k) = displ(3,iglob3)
+ dummyx_loc(3,j,k) = displ_inner_core(1,iglob3)
+ dummyy_loc(3,j,k) = displ_inner_core(2,iglob3)
+ dummyz_loc(3,j,k) = displ_inner_core(3,iglob3)
- dummyx_loc(4,j,k) = displ(1,iglob4)
- dummyy_loc(4,j,k) = displ(2,iglob4)
- dummyz_loc(4,j,k) = displ(3,iglob4)
+ dummyx_loc(4,j,k) = displ_inner_core(1,iglob4)
+ dummyy_loc(4,j,k) = displ_inner_core(2,iglob4)
+ dummyz_loc(4,j,k) = displ_inner_core(3,iglob4)
- dummyx_loc(5,j,k) = displ(1,iglob5)
- dummyy_loc(5,j,k) = displ(2,iglob5)
- dummyz_loc(5,j,k) = displ(3,iglob5)
+ dummyx_loc(5,j,k) = displ_inner_core(1,iglob5)
+ dummyy_loc(5,j,k) = displ_inner_core(2,iglob5)
+ dummyz_loc(5,j,k) = displ_inner_core(3,iglob5)
enddo
@@ -539,9 +651,9 @@
! distinguish between single and double precision for reals
if(CUSTOM_REAL == SIZE_REAL) then
! get displacement and multiply by density to compute G tensor
- sx_l = rho * dble(displ(1,iglob1))
- sy_l = rho * dble(displ(2,iglob1))
- sz_l = rho * dble(displ(3,iglob1))
+ sx_l = rho * dble(displ_inner_core(1,iglob1))
+ sy_l = rho * dble(displ_inner_core(2,iglob1))
+ sz_l = rho * dble(displ_inner_core(3,iglob1))
! compute G tensor from s . g and add to sigma (not symmetric)
sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
@@ -566,9 +678,9 @@
else
! get displacement and multiply by density to compute G tensor
- sx_l = rho * displ(1,iglob1)
- sy_l = rho * displ(2,iglob1)
- sz_l = rho * displ(3,iglob1)
+ sx_l = rho * displ_inner_core(1,iglob1)
+ sy_l = rho * displ_inner_core(2,iglob1)
+ sz_l = rho * displ_inner_core(3,iglob1)
! compute G tensor from s . g and add to sigma (not symmetric)
sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
@@ -705,15 +817,15 @@
! way 1:
! do i=1,NGLLX
! iglob = ibool(i,j,k,ispec)
-! accel(:,iglob) = accel(:,iglob) + sum_terms(:,i,j,k)
+! accel_inner_core(:,iglob) = accel_inner_core(:,iglob) + sum_terms(:,i,j,k)
! enddo
! way 2:
- accel(:,ibool(1,j,k,ispec)) = accel(:,ibool(1,j,k,ispec)) + sum_terms(:,1,j,k)
- accel(:,ibool(2,j,k,ispec)) = accel(:,ibool(2,j,k,ispec)) + sum_terms(:,2,j,k)
- accel(:,ibool(3,j,k,ispec)) = accel(:,ibool(3,j,k,ispec)) + sum_terms(:,3,j,k)
- accel(:,ibool(4,j,k,ispec)) = accel(:,ibool(4,j,k,ispec)) + sum_terms(:,4,j,k)
- accel(:,ibool(5,j,k,ispec)) = accel(:,ibool(5,j,k,ispec)) + sum_terms(:,5,j,k)
+ accel_inner_core(:,ibool(1,j,k,ispec)) = accel_inner_core(:,ibool(1,j,k,ispec)) + sum_terms(:,1,j,k)
+ accel_inner_core(:,ibool(2,j,k,ispec)) = accel_inner_core(:,ibool(2,j,k,ispec)) + sum_terms(:,2,j,k)
+ accel_inner_core(:,ibool(3,j,k,ispec)) = accel_inner_core(:,ibool(3,j,k,ispec)) + sum_terms(:,3,j,k)
+ accel_inner_core(:,ibool(4,j,k,ispec)) = accel_inner_core(:,ibool(4,j,k,ispec)) + sum_terms(:,4,j,k)
+ accel_inner_core(:,ibool(5,j,k,ispec)) = accel_inner_core(:,ibool(5,j,k,ispec)) + sum_terms(:,5,j,k)
enddo
enddo
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_outer_core_Dev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_outer_core_Dev.f90 2011-02-22 01:39:25 UTC (rev 17931)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/compute_forces_outer_core_Dev.f90 2011-02-22 01:39:52 UTC (rev 17932)
@@ -46,6 +46,8 @@
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
ibool,MOVIE_VOLUME)
+! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
+
implicit none
include "constants.h"
Modified: seismo/3D/SPECFEM3D_GLOBE/trunk/specfem3D.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/specfem3D.f90 2011-02-22 01:39:25 UTC (rev 17931)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/specfem3D.f90 2011-02-22 01:39:52 UTC (rev 17932)
@@ -511,7 +511,9 @@
real :: percentage_edge
! assembling phase number for non blocking MPI
- integer :: iphase,icall
+! iphase is for the crust_mantle, outer_core and inner_core regions
+! iphase_CC is for the central cube
+ integer :: iphase,iphase_CC,icall
! -------- arrays specific to each region here -----------
@@ -2128,8 +2130,9 @@
time = (dble(it-1)*DT-t0)*scale_t_inv
endif
- iphase = 0 ! do not start any non blocking communications at this stage
- icall = 1 ! compute all the outer elements first in the case of non blocking MPI
+ iphase = 0 ! do not start any non blocking communications at this stage
+ icall = 1 ! compute all the outer elements first in the case of non blocking MPI
+
if( USE_DEVILLE_PRODUCTS_VAL ) then
! uses Deville et al. (2002) routine
call compute_forces_outer_core_Dev(time,deltat,two_omega_earth, &
@@ -2307,7 +2310,8 @@
NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE), &
NGLOB2DMAX_XY,NCHUNKS_VAL,iphase)
- icall = 2 ! compute all the inner elements in the case of non blocking MPI
+ icall = 2 ! now compute all the inner elements in the case of non blocking MPI
+
if( USE_DEVILLE_PRODUCTS_VAL ) then
! uses Deville et al. (2002) routine
call compute_forces_outer_core_Dev(time,deltat,two_omega_earth, &
@@ -2462,6 +2466,10 @@
! for anisotropy and gravity, x y and z contain r theta and phi
+ iphase = 0 ! do not start any non blocking communications at this stage
+ iphase_CC = 0 ! do not start any non blocking communications at this stage
+ icall = 1 ! compute all the outer elements first in the case of non blocking MPI
+
if( USE_DEVILLE_PRODUCTS_VAL ) then
call compute_forces_crust_mantle_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
displ_crust_mantle,accel_crust_mantle, &
@@ -2469,6 +2477,24 @@
xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+!----------------------
+ is_on_a_slice_edge_crust_mantle,icall, &
+ accel_inner_core,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
hprime_xx,hprime_xxT, &
hprimewgll_xx,hprimewgll_xxT, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
@@ -2494,6 +2520,24 @@
xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+!----------------------
+ is_on_a_slice_edge_crust_mantle,icall, &
+ accel_inner_core,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
hprime_xx,hprime_yy,hprime_zz, &
hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
@@ -2523,6 +2567,24 @@
xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+!----------------------
+ is_on_a_slice_edge_crust_mantle,icall, &
+ accel_inner_core,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
hprime_xx,hprime_xxT, &
hprimewgll_xx,hprimewgll_xxT, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
@@ -2548,6 +2610,24 @@
xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+!----------------------
+ is_on_a_slice_edge_crust_mantle,icall, &
+ accel_inner_core,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
hprime_xx,hprime_yy,hprime_zz, &
hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
@@ -2570,32 +2650,6 @@
endif
endif
- ! Stacey
- if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) then
- call compute_stacey_crust_mantle(ichunk,SIMULATION_TYPE, &
- NSTEP,it,SAVE_FORWARD,ibool_crust_mantle, &
- veloc_crust_mantle,accel_crust_mantle,b_accel_crust_mantle, &
- jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle, &
- jacobian2D_ymin_crust_mantle,jacobian2D_ymax_crust_mantle, &
- wgllwgll_xz,wgllwgll_yz, &
- normal_xmin_crust_mantle,normal_xmax_crust_mantle, &
- normal_ymin_crust_mantle,normal_ymax_crust_mantle, &
- rho_vp_crust_mantle,rho_vs_crust_mantle, &
- ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle, &
- ibelm_ymin_crust_mantle,ibelm_ymax_crust_mantle, &
- nimin_crust_mantle,nimax_crust_mantle, &
- njmin_crust_mantle,njmax_crust_mantle, &
- nkmin_xi_crust_mantle,nkmin_eta_crust_mantle, &
- nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
- nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle, &
- reclen_xmin_crust_mantle,reclen_xmax_crust_mantle, &
- reclen_ymin_crust_mantle,reclen_ymax_crust_mantle, &
- nabs_xmin_cm,nabs_xmax_cm,nabs_ymin_cm,nabs_ymax_cm, &
- absorb_xmin_crust_mantle5,absorb_xmax_crust_mantle5, &
- absorb_ymin_crust_mantle5,absorb_ymax_crust_mantle5)
- endif ! Stacey conditions
-
-
! Deville routine
if( USE_DEVILLE_PRODUCTS_VAL ) then
call compute_forces_inner_core_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
@@ -2604,6 +2658,24 @@
xix_inner_core,xiy_inner_core,xiz_inner_core, &
etax_inner_core,etay_inner_core,etaz_inner_core, &
gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
+!----------------------
+ is_on_a_slice_edge_inner_core,icall, &
+ accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
@@ -2622,6 +2694,24 @@
xix_inner_core,xiy_inner_core,xiz_inner_core, &
etax_inner_core,etay_inner_core,etaz_inner_core, &
gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
+!----------------------
+ is_on_a_slice_edge_inner_core,icall, &
+ accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
@@ -2643,6 +2733,24 @@
xix_inner_core,xiy_inner_core,xiz_inner_core, &
etax_inner_core,etay_inner_core,etaz_inner_core, &
gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
+!----------------------
+ is_on_a_slice_edge_inner_core,icall, &
+ accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
@@ -2661,6 +2769,24 @@
xix_inner_core,xiy_inner_core,xiz_inner_core, &
etax_inner_core,etay_inner_core,etaz_inner_core, &
gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
+!----------------------
+ is_on_a_slice_edge_inner_core,icall, &
+ accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
@@ -2675,6 +2801,31 @@
endif
endif
+ ! Stacey
+ if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) then
+ call compute_stacey_crust_mantle(ichunk,SIMULATION_TYPE, &
+ NSTEP,it,SAVE_FORWARD,ibool_crust_mantle, &
+ veloc_crust_mantle,accel_crust_mantle,b_accel_crust_mantle, &
+ jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle, &
+ jacobian2D_ymin_crust_mantle,jacobian2D_ymax_crust_mantle, &
+ wgllwgll_xz,wgllwgll_yz, &
+ normal_xmin_crust_mantle,normal_xmax_crust_mantle, &
+ normal_ymin_crust_mantle,normal_ymax_crust_mantle, &
+ rho_vp_crust_mantle,rho_vs_crust_mantle, &
+ ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle, &
+ ibelm_ymin_crust_mantle,ibelm_ymax_crust_mantle, &
+ nimin_crust_mantle,nimax_crust_mantle, &
+ njmin_crust_mantle,njmax_crust_mantle, &
+ nkmin_xi_crust_mantle,nkmin_eta_crust_mantle, &
+ nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
+ nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle, &
+ reclen_xmin_crust_mantle,reclen_xmax_crust_mantle, &
+ reclen_ymin_crust_mantle,reclen_ymax_crust_mantle, &
+ nabs_xmin_cm,nabs_xmax_cm,nabs_ymin_cm,nabs_ymax_cm, &
+ absorb_xmin_crust_mantle5,absorb_xmax_crust_mantle5, &
+ absorb_ymin_crust_mantle5,absorb_ymax_crust_mantle5)
+ endif ! Stacey conditions
+
! add the sources
if (SIMULATION_TYPE == 1) &
call compute_add_sources(myrank,NSOURCES, &
@@ -2776,9 +2927,224 @@
! assemble all the contributions between slices using MPI
+! assemble all the contributions between slices using MPI
+! crust/mantle and inner core handled in the same call
+! in order to reduce the number of MPI messages by 2
+ if(USE_NONBLOCKING_COMMS) then
+
+ iphase = 1 ! initialize the non blocking communication counter
+ iphase_CC = 1 ! initialize the non blocking communication counter for the central cube
+
+! start the non blocking communications
+ call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
+ iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector, &
+ NUMMSGS_FACES,NCORNERSCHUNKS, &
+ NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
+ NGLOB1D_RADIAL(IREGION_INNER_CORE),NCHUNKS_VAL,iphase)
+
+ icall = 2 ! now compute all the inner elements in the case of non blocking MPI
+
+ ! compute internal forces in the solid regions
+
+ ! for anisotropy and gravity, x y and z contain r theta and phi
+
+ if( USE_DEVILLE_PRODUCTS_VAL ) then
+ call compute_forces_crust_mantle_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
+ displ_crust_mantle,accel_crust_mantle, &
+ xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+!----------------------
+ is_on_a_slice_edge_crust_mantle,icall, &
+ accel_inner_core,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
+ hprime_xx,hprime_xxT, &
+ hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+ kappavstore_crust_mantle,kappahstore_crust_mantle,muvstore_crust_mantle, &
+ muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
+ c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
+ c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
+ c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
+ c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
+ c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ ibool_crust_mantle,idoubling_crust_mantle, &
+ R_memory_crust_mantle,epsilondev_crust_mantle, &
+ eps_trace_over_3_crust_mantle,one_minus_sum_beta_crust_mantle, &
+ alphaval,betaval,gammaval,factor_common_crust_mantle, &
+ size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
+ size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
+ else
+ call compute_forces_crust_mantle(minus_gravity_table,density_table,minus_deriv_gravity_table, &
+ displ_crust_mantle,accel_crust_mantle, &
+ xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+ xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle, &
+ etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
+ gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle, &
+!----------------------
+ is_on_a_slice_edge_crust_mantle,icall, &
+ accel_inner_core,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
+ hprime_xx,hprime_yy,hprime_zz, &
+ hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+ kappavstore_crust_mantle,kappahstore_crust_mantle,muvstore_crust_mantle, &
+ muhstore_crust_mantle,eta_anisostore_crust_mantle, &
+ c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
+ c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
+ c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
+ c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
+ c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
+ c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+ c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
+ ibool_crust_mantle,idoubling_crust_mantle, &
+ R_memory_crust_mantle,epsilondev_crust_mantle, &
+ eps_trace_over_3_crust_mantle,one_minus_sum_beta_crust_mantle, &
+ alphaval,betaval,gammaval,factor_common_crust_mantle, &
+ size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
+ size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
+ endif
+
+ ! Deville routine
+ if( USE_DEVILLE_PRODUCTS_VAL ) then
+ call compute_forces_inner_core_Dev(minus_gravity_table,density_table,minus_deriv_gravity_table, &
+ displ_inner_core,accel_inner_core, &
+ xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+ xix_inner_core,xiy_inner_core,xiz_inner_core, &
+ etax_inner_core,etay_inner_core,etaz_inner_core, &
+ gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
+!----------------------
+ is_on_a_slice_edge_inner_core,icall, &
+ accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
+ hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+ kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
+ c11store_inner_core,c33store_inner_core,c12store_inner_core, &
+ c13store_inner_core,c44store_inner_core, &
+ R_memory_inner_core,epsilondev_inner_core, eps_trace_over_3_inner_core,&
+ one_minus_sum_beta_inner_core, &
+ alphaval,betaval,gammaval, &
+ factor_common_inner_core, &
+ size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
+ size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
+ else
+ call compute_forces_inner_core(minus_gravity_table,density_table,minus_deriv_gravity_table, &
+ displ_inner_core,accel_inner_core, &
+ xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+ xix_inner_core,xiy_inner_core,xiz_inner_core, &
+ etax_inner_core,etay_inner_core,etaz_inner_core, &
+ gammax_inner_core,gammay_inner_core,gammaz_inner_core, &
+!----------------------
+ is_on_a_slice_edge_inner_core,icall, &
+ accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
+ myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector,iphase, &
+ nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+!----------------------
+ hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+ wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+ kappavstore_inner_core,muvstore_inner_core,ibool_inner_core,idoubling_inner_core, &
+ c11store_inner_core,c33store_inner_core,c12store_inner_core, &
+ c13store_inner_core,c44store_inner_core, &
+ R_memory_inner_core,epsilondev_inner_core, eps_trace_over_3_inner_core,&
+ one_minus_sum_beta_inner_core, &
+ alphaval,betaval,gammaval, &
+ factor_common_inner_core, &
+ size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
+ size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
+ endif
+
+! assemble all the contributions between slices using MPI
+! crust/mantle and inner core handled in the same call
+! in order to reduce the number of MPI messages by 2
+ do while (iphase <= 7) ! make sure the last communications are finished and processed
+ call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
+ iproc_xi,iproc_eta,ichunk,addressing, &
+ iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+ npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+ iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+ iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+ npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+ iboolfaces_inner_core,iboolcorner_inner_core, &
+ iprocfrom_faces,iprocto_faces, &
+ iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+ buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+ buffer_send_chunkcorners_vector,buffer_recv_chunkcorners_vector, &
+ NUMMSGS_FACES,NCORNERSCHUNKS, &
+ NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
+ NGLOB1D_RADIAL(IREGION_INNER_CORE),NCHUNKS_VAL,iphase)
+ enddo
+ else
! crust/mantle and inner core handled in the same call
! in order to reduce the number of MPI messages by 2
- call assemble_MPI_vector(myrank, &
+ call assemble_MPI_vector_block(myrank, &
accel_crust_mantle,NGLOB_CRUST_MANTLE, &
accel_inner_core,NGLOB_INNER_CORE, &
iproc_xi,iproc_eta,ichunk,addressing, &
@@ -2801,20 +3167,27 @@
NGLOB1D_RADIAL(IREGION_INNER_CORE), &
NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE), &
NGLOB2DMAX_XY,NCHUNKS_VAL)
+ endif
!---
!--- use buffers to assemble forces with the central cube
!---
- if(INCLUDE_CENTRAL_CUBE) then
- call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
- npoin2D_cube_from_slices, buffer_all_cube_from_slices, &
- buffer_slices, buffer_slices2, ibool_central_cube, &
- receiver_cube_from_slices, ibool_inner_core, &
- idoubling_inner_core, NSPEC_INNER_CORE, &
- ibelm_bottom_inner_core, NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
- NGLOB_INNER_CORE,accel_inner_core,NDIM)
- endif ! end of assembling forces with the central cube
+ if(INCLUDE_CENTRAL_CUBE) then
+ if(USE_NONBLOCKING_COMMS) then
+ do while (iphase_CC <= 4) ! make sure the last communications are finished and processed
+ call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+ receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
+ ibelm_bottom_inner_core,NSPEC2D_BOTTOM(IREGION_INNER_CORE),accel_inner_core,NDIM,iphase_CC)
+ enddo
+ else
+ call assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+ npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,buffer_slices2,ibool_central_cube, &
+ receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core,NSPEC_INNER_CORE, &
+ ibelm_bottom_inner_core,NSPEC2D_BOTTOM(IREGION_INNER_CORE),NGLOB_INNER_CORE,accel_inner_core,NDIM)
+ endif
+ endif ! end of assembling forces with the central cube
! way 1:
! do i=1,NGLOB_CRUST_MANTLE
@@ -2865,7 +3238,7 @@
! crust/mantle and inner core handled in the same call
! in order to reduce the number of MPI messages by 2
- call assemble_MPI_vector(myrank, &
+ call assemble_MPI_vector_block(myrank, &
b_accel_crust_mantle,NGLOB_CRUST_MANTLE, &
b_accel_inner_core,NGLOB_INNER_CORE, &
iproc_xi,iproc_eta,ichunk,addressing, &
@@ -2895,7 +3268,8 @@
if(INCLUDE_CENTRAL_CUBE) then
- call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
+!! DK DK 33333333333333333333333 this should be converted to non blocking (and thus should have iphase etc)
+ call assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
npoin2D_cube_from_slices, buffer_all_cube_from_slices, &
buffer_slices, buffer_slices2, ibool_central_cube, &
receiver_cube_from_slices, ibool_inner_core, &
More information about the CIG-COMMITS
mailing list