[cig-commits] r19948 - in seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER: EXAMPLES/global_s362ani_small/DATA EXAMPLES/regional_Greece_small setup src/cuda src/meshfem3D src/shared src/specfem3D

Mon Apr 16 14:29:37 PDT 2012

Author: danielpeter
Date: 2012-04-16 14:29:35 -0700 (Mon, 16 Apr 2012)
New Revision: 19948

Added:
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/assemble_MPI_central_cube_mesh.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/assemble_MPI_scalar_mesh.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/assemble_MPI_vector_mesh.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_MPI_interfaces.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_central_cube_buffers.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/fix_non_blocking_flags.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/get_MPI_interfaces.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/heap_sort.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/read_arrays_buffers_mesher.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_color_perm.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/test_MPI_interfaces.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/write_VTK_file.f90
Removed:
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube_block.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar_block.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/create_central_cube_buffers.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/fix_non_blocking_flags.f90
Modified:
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_s362ani_small/DATA/Par_file
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/xcombine_vol_data.sh
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/xcombine_vol_data.vs.sh
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/Makefile.in
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/compute_area.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_chunk_buffers.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_doubling_elements.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_mass_matrices.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_meshes.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_regions_mesh.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_regular_elements.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/meshfem3D.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/meshfem3D_models.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/meshfem3D_par.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_model.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/count_elements.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/define_all_layers.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/get_timestep_and_layers.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/model_topo_bathy.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_compute_parameters.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_parameter_file.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_value_parameters.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_element.F90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/initialize_simulation.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_receivers.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_sources.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_buffers_solver.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_solver.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_topography_bathymetry.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/recompute_jacobian.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_output.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_volume.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90
Log:
puts mpi interface detection to mesher; removes unused routines

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_s362ani_small/DATA/Par_file
===================================================================

--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_s362ani_small/DATA/Par_file	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/global_s362ani_small/DATA/Par_file	2012-04-16 21:29:35 UTC (rev 19948)
@@ -16,8 +16,8 @@
 
 # number of elements at the surface along the two sides of the first chunk
 # (must be multiple of 32 and 16 * multiple of NPROC below)
-NEX_XI                          = 96
-NEX_ETA                         = 96 
+NEX_XI                          = 48
+NEX_ETA                         = 48 
 
 # number of MPI processors along the two sides of the first chunk
 NPROC_XI                        = 1
@@ -46,7 +46,7 @@
 ABSORBING_CONDITIONS            = .false.
 
 # record length in minutes
-RECORD_LENGTH_IN_MINUTES        = 16.0d0
+RECORD_LENGTH_IN_MINUTES        = 30.0d0
 
 # save AVS or OpenDX movies
 MOVIE_SURFACE                   = .false.
@@ -86,7 +86,7 @@
 LOCAL_TMP_PATH                  = ./DATABASES_MPI
 
 # interval at which we output time step info and max of norm of displacement
-NTSTEP_BETWEEN_OUTPUT_INFO      = 500
+NTSTEP_BETWEEN_OUTPUT_INFO      = 50
 
 # interval in time steps for temporary writing of seismograms
 NTSTEP_BETWEEN_OUTPUT_SEISMOS   = 5000000

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/xcombine_vol_data.sh
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/xcombine_vol_data.sh	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/xcombine_vol_data.sh	2012-04-16 21:29:35 UTC (rev 19948)
@@ -4,7 +4,10 @@
 # note: script requires executable 'mesh2vtu'
 ##############################################
 # partitions
-numCPUs=4
+numCPUs=$1
+if [ "$1" == "" ]; then echo "usage:./xcombine.sh numCPUs[e.g.=4]"; exit; fi
+
+
 # slice file
 echo "1" | awk '{for(i=0;i<numCPUs;i++)print i}' numCPUs=$numCPUs > slices_all.txt
 slice="slices_all.txt"
@@ -20,9 +23,9 @@
 echo "alpha_kernel"
 echo
 ./bin/xcombine_vol_data $slice alpha_kernel $dir $dir OUTPUT_FILES/ $res > tmp.log
-mesh2vtu.pl -i OUTPUT_FILES/reg_1_alpha_kernel.mesh -o OUTPUT_FILES/reg_1_alpha_kernel.vtu >> tmp.log
-mesh2vtu.pl -i OUTPUT_FILES/reg_2_alpha_kernel.mesh -o OUTPUT_FILES/reg_2_alpha_kernel.vtu >> tmp.log
-mesh2vtu.pl -i OUTPUT_FILES/reg_3_alpha_kernel.mesh -o OUTPUT_FILES/reg_3_alpha_kernel.vtu >> tmp.log
+mesh2vtu  OUTPUT_FILES/reg_1_alpha_kernel.mesh  OUTPUT_FILES/reg_1_alpha_kernel.vtu >> tmp.log
+mesh2vtu  OUTPUT_FILES/reg_2_alpha_kernel.mesh  OUTPUT_FILES/reg_2_alpha_kernel.vtu >> tmp.log
+mesh2vtu  OUTPUT_FILES/reg_3_alpha_kernel.mesh  OUTPUT_FILES/reg_3_alpha_kernel.vtu >> tmp.log
 rm -f OUTPUT_FILES/reg_*alpha*.mesh
 min=`grep "min/max" tmp.log | awk '{print $3 }' | sort | head -n 1`
 max=`grep "min/max" tmp.log | awk '{print $4 }' | sort | tail -n 1`
@@ -35,7 +38,7 @@
 echo
 # only for crust_mantle region
 ./bin/xcombine_vol_data $slice beta_kernel $dir $dir OUTPUT_FILES/ $res 1 > tmp.log
-mesh2vtu.pl -i OUTPUT_FILES/reg_1_beta_kernel.mesh -o OUTPUT_FILES/reg_1_beta_kernel.vtu >> tmp.log
+mesh2vtu  OUTPUT_FILES/reg_1_beta_kernel.mesh  OUTPUT_FILES/reg_1_beta_kernel.vtu >> tmp.log
 rm -f OUTPUT_FILES/reg_*beta*.mesh
 min=`grep "min/max" tmp.log | awk '{print $3 }' | sort | head -n 1`
 max=`grep "min/max" tmp.log | awk '{print $4 }' | sort | tail -n 1`
@@ -47,9 +50,9 @@
 echo "rho_kernel"
 echo
 ./bin/xcombine_vol_data $slice rho_kernel $dir $dir OUTPUT_FILES/ $res > tmp.log
-mesh2vtu.pl -i OUTPUT_FILES/reg_1_rho_kernel.mesh -o OUTPUT_FILES/reg_1_rho_kernel.vtu >> tmp.log
-mesh2vtu.pl -i OUTPUT_FILES/reg_2_rho_kernel.mesh -o OUTPUT_FILES/reg_2_rho_kernel.vtu >> tmp.log
-mesh2vtu.pl -i OUTPUT_FILES/reg_3_rho_kernel.mesh -o OUTPUT_FILES/reg_3_rho_kernel.vtu >> tmp.log
+mesh2vtu  OUTPUT_FILES/reg_1_rho_kernel.mesh  OUTPUT_FILES/reg_1_rho_kernel.vtu >> tmp.log
+mesh2vtu  OUTPUT_FILES/reg_2_rho_kernel.mesh  OUTPUT_FILES/reg_2_rho_kernel.vtu >> tmp.log
+mesh2vtu  OUTPUT_FILES/reg_3_rho_kernel.mesh  OUTPUT_FILES/reg_3_rho_kernel.vtu >> tmp.log
 rm -f OUTPUT_FILES/reg_*rho*.mesh
 min=`grep "min/max" tmp.log | awk '{print $3 }' | sort | head -n 1`
 max=`grep "min/max" tmp.log | awk '{print $4 }' | sort | tail -n 1`

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/xcombine_vol_data.vs.sh
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/xcombine_vol_data.vs.sh	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/EXAMPLES/regional_Greece_small/xcombine_vol_data.vs.sh	2012-04-16 21:29:35 UTC (rev 19948)
@@ -3,8 +3,12 @@
 #
 # note: script requires executable 'mesh2vtu'
 ##############################################
+
 # partitions
-numCPUs=4
+numCPUs=$1
+if [ "$1" == "" ]; then echo "usage:./xcombine.sh numCPUs[e.g.=4]"; exit; fi
+
+
 # slice file
 echo "1" | awk '{for(i=0;i<numCPUs;i++)print i}' numCPUs=$numCPUs > slices_all.txt
 slice="slices_all.txt"
@@ -22,9 +26,9 @@
 echo "velocity model: $par"
 echo
 ./bin/xcombine_vol_data $slice $par $dir $dir OUTPUT_FILES/ $res > tmp.log
-mesh2vtu.pl -i OUTPUT_FILES/reg_1_$par.mesh -o OUTPUT_FILES/reg_1_$par.vtu >> tmp.log
-mesh2vtu.pl -i OUTPUT_FILES/reg_2_$par.mesh -o OUTPUT_FILES/reg_2_$par.vtu >> tmp.log
-mesh2vtu.pl -i OUTPUT_FILES/reg_3_$par.mesh -o OUTPUT_FILES/reg_3_$par.vtu >> tmp.log
+mesh2vtu OUTPUT_FILES/reg_1_$par.mesh  OUTPUT_FILES/reg_1_$par.vtu >> tmp.log
+mesh2vtu OUTPUT_FILES/reg_2_$par.mesh  OUTPUT_FILES/reg_2_$par.vtu >> tmp.log
+mesh2vtu OUTPUT_FILES/reg_3_$par.mesh  OUTPUT_FILES/reg_3_$par.vtu >> tmp.log
 rm -f OUTPUT_FILES/reg_*$par*.mesh
 min=`grep "min/max" tmp.log | awk '{print $3 }' | sort | head -n 1`
 max=`grep "min/max" tmp.log | awk '{print $4 }' | sort | tail -n 1`

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/setup/constants.h.in	2012-04-16 21:29:35 UTC (rev 19948)
@@ -50,19 +50,11 @@
   integer, parameter :: ELEMENTS_NONBLOCKING_CM_IC = 1500
   integer, parameter :: ELEMENTS_NONBLOCKING_OC = 3000
 
-!*********************************************************************************************************
-! added these parameters for the future GPU version of the solver with mesh coloring
-
-! sort outer elements first and then inner elements
-! in order to use non blocking MPI to overlap communications
-  logical, parameter :: SORT_MESH_INNER_OUTER = .true.
-
 ! add mesh coloring for the GPU + MPI implementation
   logical, parameter :: USE_MESH_COLORING_GPU = .false.
   integer, parameter :: MAX_NUMBER_OF_COLORS = 1000
   integer, parameter :: NGNOD_HEXAHEDRA = 8
 
-!*********************************************************************************************************
 
 ! if files on a local path on each node are also seen as global with same path
 ! set to .true. typically on a machine with a common (shared) file system, e.g. LUSTRE, GPFS or NFS-mounted /home

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_coupling_cuda.cu	2012-04-16 21:29:35 UTC (rev 19948)
@@ -44,15 +44,15 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 __global__ void compute_coupling_fluid_CMB_kernel(realw* displ_crust_mantle,
-              realw* accel_outer_core,
-              int* ibool_crust_mantle,
-              int* ibelm_bottom_crust_mantle,
-              realw* normal_top_outer_core,
-              realw* jacobian2D_top_outer_core,
-              realw* wgllwgll_xy,
-              int* ibool_outer_core,
-              int* ibelm_top_outer_core,
-              int NSPEC2D_TOP_OC) {
+                                                  realw* accel_outer_core,
+                                                  int* ibool_crust_mantle,
+                                                  int* ibelm_bottom_crust_mantle,
+                                                  realw* normal_top_outer_core,
+                                                  realw* jacobian2D_top_outer_core,
+                                                  realw* wgllwgll_xy,
+                                                  int* ibool_outer_core,
+                                                  int* ibelm_top_outer_core,
+                                                  int NSPEC2D_TOP_OC) {
 
   int i = threadIdx.x;
   int j = threadIdx.y;
@@ -108,15 +108,15 @@
 }
 
 __global__ void compute_coupling_fluid_ICB_kernel(realw* displ_inner_core,
-              realw* accel_outer_core,
-              int* ibool_inner_core,
-              int* ibelm_top_inner_core,
-              realw* normal_bottom_outer_core,
-              realw* jacobian2D_bottom_outer_core,
-              realw* wgllwgll_xy,
-              int* ibool_outer_core,
-              int* ibelm_bottom_outer_core,
-              int NSPEC2D_BOTTOM_OC) {
+                                                  realw* accel_outer_core,
+                                                  int* ibool_inner_core,
+                                                  int* ibelm_top_inner_core,
+                                                  realw* normal_bottom_outer_core,
+                                                  realw* jacobian2D_bottom_outer_core,
+                                                  realw* wgllwgll_xy,
+                                                  int* ibool_outer_core,
+                                                  int* ibelm_bottom_outer_core,
+                                                  int NSPEC2D_BOTTOM_OC) {
 
   int i = threadIdx.x;
   int j = threadIdx.y;
@@ -194,28 +194,28 @@
 
   // launches GPU kernel
   compute_coupling_fluid_CMB_kernel<<<grid,threads>>>(mp->d_displ_crust_mantle,
-                  mp->d_accel_outer_core,
-                  mp->d_ibool_crust_mantle,
-                  mp->d_ibelm_bottom_crust_mantle,
-                  mp->d_normal_top_outer_core,
-                  mp->d_jacobian2D_top_outer_core,
-                  mp->d_wgllwgll_xy,
-                  mp->d_ibool_outer_core,
-                  mp->d_ibelm_top_outer_core,
-                  mp->nspec2D_top_outer_core);
+                                                      mp->d_accel_outer_core,
+                                                      mp->d_ibool_crust_mantle,
+                                                      mp->d_ibelm_bottom_crust_mantle,
+                                                      mp->d_normal_top_outer_core,
+                                                      mp->d_jacobian2D_top_outer_core,
+                                                      mp->d_wgllwgll_xy,
+                                                      mp->d_ibool_outer_core,
+                                                      mp->d_ibelm_top_outer_core,
+                                                      mp->nspec2D_top_outer_core);
 
   // adjoint simulations
   if ( mp->simulation_type == 3 ){
     compute_coupling_fluid_CMB_kernel<<<grid,threads>>>(mp->d_b_displ_crust_mantle,
-              mp->d_b_accel_outer_core,
-              mp->d_ibool_crust_mantle,
-              mp->d_ibelm_bottom_crust_mantle,
-              mp->d_normal_top_outer_core,
-              mp->d_jacobian2D_top_outer_core,
-              mp->d_wgllwgll_xy,
-              mp->d_ibool_outer_core,
-              mp->d_ibelm_top_outer_core,
-              mp->nspec2D_top_outer_core);
+                                                        mp->d_b_accel_outer_core,
+                                                        mp->d_ibool_crust_mantle,
+                                                        mp->d_ibelm_bottom_crust_mantle,
+                                                        mp->d_normal_top_outer_core,
+                                                        mp->d_jacobian2D_top_outer_core,
+                                                        mp->d_wgllwgll_xy,
+                                                        mp->d_ibool_outer_core,
+                                                        mp->d_ibelm_top_outer_core,
+                                                        mp->nspec2D_top_outer_core);
   }
 
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -248,28 +248,28 @@
 
   // launches GPU kernel
   compute_coupling_fluid_ICB_kernel<<<grid,threads>>>(mp->d_displ_inner_core,
-                  mp->d_accel_outer_core,
-                  mp->d_ibool_inner_core,
-                  mp->d_ibelm_top_inner_core,
-                  mp->d_normal_bottom_outer_core,
-                  mp->d_jacobian2D_bottom_outer_core,
-                  mp->d_wgllwgll_xy,
-                  mp->d_ibool_outer_core,
-                  mp->d_ibelm_bottom_outer_core,
-                  mp->nspec2D_bottom_outer_core);
+                                                      mp->d_accel_outer_core,
+                                                      mp->d_ibool_inner_core,
+                                                      mp->d_ibelm_top_inner_core,
+                                                      mp->d_normal_bottom_outer_core,
+                                                      mp->d_jacobian2D_bottom_outer_core,
+                                                      mp->d_wgllwgll_xy,
+                                                      mp->d_ibool_outer_core,
+                                                      mp->d_ibelm_bottom_outer_core,
+                                                      mp->nspec2D_bottom_outer_core);
 
   // adjoint simulations
   if ( mp->simulation_type == 3 ){
     compute_coupling_fluid_ICB_kernel<<<grid,threads>>>(mp->d_b_displ_inner_core,
-              mp->d_b_accel_outer_core,
-              mp->d_ibool_inner_core,
-              mp->d_ibelm_top_inner_core,
-              mp->d_normal_bottom_outer_core,
-              mp->d_jacobian2D_bottom_outer_core,
-              mp->d_wgllwgll_xy,
-              mp->d_ibool_outer_core,
-              mp->d_ibelm_bottom_outer_core,
-              mp->nspec2D_bottom_outer_core);
+                                                        mp->d_b_accel_outer_core,
+                                                        mp->d_ibool_inner_core,
+                                                        mp->d_ibelm_top_inner_core,
+                                                        mp->d_normal_bottom_outer_core,
+                                                        mp->d_jacobian2D_bottom_outer_core,
+                                                        mp->d_wgllwgll_xy,
+                                                        mp->d_ibool_outer_core,
+                                                        mp->d_ibelm_bottom_outer_core,
+                                                        mp->nspec2D_bottom_outer_core);
   }
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   //double end_time = get_time();
@@ -285,19 +285,19 @@
 /* ----------------------------------------------------------------------------------------------- */
 
 __global__ void compute_coupling_CMB_fluid_kernel(realw* displ_crust_mantle,
-              realw* accel_crust_mantle,
-              realw* accel_outer_core,
-              int* ibool_crust_mantle,
-              int* ibelm_bottom_crust_mantle,
-              realw* normal_top_outer_core,
-              realw* jacobian2D_top_outer_core,
-              realw* wgllwgll_xy,
-              int* ibool_outer_core,
-              int* ibelm_top_outer_core,
-              double RHO_TOP_OC,
-              realw minus_g_cmb,
-              int GRAVITY_VAL,
-              int NSPEC_BOTTOM_CM) {
+                                                  realw* accel_crust_mantle,
+                                                  realw* accel_outer_core,
+                                                  int* ibool_crust_mantle,
+                                                  int* ibelm_bottom_crust_mantle,
+                                                  realw* normal_top_outer_core,
+                                                  realw* jacobian2D_top_outer_core,
+                                                  realw* wgllwgll_xy,
+                                                  int* ibool_outer_core,
+                                                  int* ibelm_top_outer_core,
+                                                  double RHO_TOP_OC,
+                                                  realw minus_g_cmb,
+                                                  int GRAVITY_VAL,
+                                                  int NSPEC_BOTTOM_CM) {
 
   int i = threadIdx.x;
   int j = threadIdx.y;
@@ -354,19 +354,19 @@
 }
 
 __global__ void compute_coupling_ICB_fluid_kernel(realw* displ_inner_core,
-              realw* accel_inner_core,
-              realw* accel_outer_core,
-              int* ibool_inner_core,
-              int* ibelm_top_inner_core,
-              realw* normal_bottom_outer_core,
-              realw* jacobian2D_bottom_outer_core,
-              realw* wgllwgll_xy,
-              int* ibool_outer_core,
-              int* ibelm_bottom_outer_core,
-              double RHO_BOTTOM_OC,
-              realw minus_g_icb,
-              int GRAVITY_VAL,
-              int NSPEC_TOP_IC) {
+                                                  realw* accel_inner_core,
+                                                  realw* accel_outer_core,
+                                                  int* ibool_inner_core,
+                                                  int* ibelm_top_inner_core,
+                                                  realw* normal_bottom_outer_core,
+                                                  realw* jacobian2D_bottom_outer_core,
+                                                  realw* wgllwgll_xy,
+                                                  int* ibool_outer_core,
+                                                  int* ibelm_bottom_outer_core,
+                                                  double RHO_BOTTOM_OC,
+                                                  realw minus_g_icb,
+                                                  int GRAVITY_VAL,
+                                                  int NSPEC_TOP_IC) {
 
   int i = threadIdx.x;
   int j = threadIdx.y;
@@ -427,9 +427,9 @@
 extern "C"
 void FC_FUNC_(compute_coupling_cmb_fluid_cuda,
               COMPUTE_COUPLING_CMB_FLUID_CUDA)(long* Mesh_pointer_f,
-                 double RHO_TOP_OC,
-                 realw minus_g_cmb,
-                 int GRAVITY_VAL) {
+                                               double RHO_TOP_OC,
+                                               realw minus_g_cmb,
+                                               int GRAVITY_VAL) {
 
   TRACE("compute_coupling_cmb_fluid_cuda");
   //double start_time = get_time();
@@ -448,36 +448,36 @@
 
   // launches GPU kernel
   compute_coupling_CMB_fluid_kernel<<<grid,threads>>>(mp->d_displ_crust_mantle,
-                  mp->d_accel_crust_mantle,
-                  mp->d_accel_outer_core,
-                  mp->d_ibool_crust_mantle,
-                  mp->d_ibelm_bottom_crust_mantle,
-                  mp->d_normal_top_outer_core,
-                  mp->d_jacobian2D_top_outer_core,
-                  mp->d_wgllwgll_xy,
-                  mp->d_ibool_outer_core,
-                  mp->d_ibelm_top_outer_core,
-                  RHO_TOP_OC,
-                  minus_g_cmb,
-                  GRAVITY_VAL,
-                  mp->nspec2D_bottom_crust_mantle);
+                                                      mp->d_accel_crust_mantle,
+                                                      mp->d_accel_outer_core,
+                                                      mp->d_ibool_crust_mantle,
+                                                      mp->d_ibelm_bottom_crust_mantle,
+                                                      mp->d_normal_top_outer_core,
+                                                      mp->d_jacobian2D_top_outer_core,
+                                                      mp->d_wgllwgll_xy,
+                                                      mp->d_ibool_outer_core,
+                                                      mp->d_ibelm_top_outer_core,
+                                                      RHO_TOP_OC,
+                                                      minus_g_cmb,
+                                                      GRAVITY_VAL,
+                                                      mp->nspec2D_bottom_crust_mantle);
 
   //  adjoint simulations
   if ( mp->simulation_type == 3 ){
     compute_coupling_CMB_fluid_kernel<<<grid,threads>>>(mp->d_b_displ_crust_mantle,
-              mp->d_b_accel_crust_mantle,
-              mp->d_b_accel_outer_core,
-              mp->d_ibool_crust_mantle,
-              mp->d_ibelm_bottom_crust_mantle,
-              mp->d_normal_top_outer_core,
-              mp->d_jacobian2D_top_outer_core,
-              mp->d_wgllwgll_xy,
-              mp->d_ibool_outer_core,
-              mp->d_ibelm_top_outer_core,
-              RHO_TOP_OC,
-              minus_g_cmb,
-              GRAVITY_VAL,
-              mp->nspec2D_bottom_crust_mantle);
+                                                        mp->d_b_accel_crust_mantle,
+                                                        mp->d_b_accel_outer_core,
+                                                        mp->d_ibool_crust_mantle,
+                                                        mp->d_ibelm_bottom_crust_mantle,
+                                                        mp->d_normal_top_outer_core,
+                                                        mp->d_jacobian2D_top_outer_core,
+                                                        mp->d_wgllwgll_xy,
+                                                        mp->d_ibool_outer_core,
+                                                        mp->d_ibelm_top_outer_core,
+                                                        RHO_TOP_OC,
+                                                        minus_g_cmb,
+                                                        GRAVITY_VAL,
+                                                        mp->nspec2D_bottom_crust_mantle);
   }
 
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -492,9 +492,9 @@
 extern "C"
 void FC_FUNC_(compute_coupling_icb_fluid_cuda,
               COMPUTE_COUPLING_ICB_FLUID_CUDA)(long* Mesh_pointer_f,
-                 double RHO_BOTTOM_OC,
-                 realw minus_g_icb,
-                 int GRAVITY_VAL) {
+                                               double RHO_BOTTOM_OC,
+                                               realw minus_g_icb,
+                                               int GRAVITY_VAL) {
 
   TRACE("compute_coupling_icb_fluid_cuda");
   //double start_time = get_time();
@@ -513,36 +513,36 @@
 
   // launches GPU kernel
   compute_coupling_ICB_fluid_kernel<<<grid,threads>>>(mp->d_displ_inner_core,
-                  mp->d_accel_inner_core,
-                  mp->d_accel_outer_core,
-                  mp->d_ibool_inner_core,
-                  mp->d_ibelm_top_inner_core,
-                  mp->d_normal_bottom_outer_core,
-                  mp->d_jacobian2D_bottom_outer_core,
-                  mp->d_wgllwgll_xy,
-                  mp->d_ibool_outer_core,
-                  mp->d_ibelm_bottom_outer_core,
-                  RHO_BOTTOM_OC,
-                  minus_g_icb,
-                  GRAVITY_VAL,
-                  mp->nspec2D_top_inner_core);
+                                                      mp->d_accel_inner_core,
+                                                      mp->d_accel_outer_core,
+                                                      mp->d_ibool_inner_core,
+                                                      mp->d_ibelm_top_inner_core,
+                                                      mp->d_normal_bottom_outer_core,
+                                                      mp->d_jacobian2D_bottom_outer_core,
+                                                      mp->d_wgllwgll_xy,
+                                                      mp->d_ibool_outer_core,
+                                                      mp->d_ibelm_bottom_outer_core,
+                                                      RHO_BOTTOM_OC,
+                                                      minus_g_icb,
+                                                      GRAVITY_VAL,
+                                                      mp->nspec2D_top_inner_core);
 
   //  adjoint simulations
   if ( mp->simulation_type == 3 ){
     compute_coupling_ICB_fluid_kernel<<<grid,threads>>>(mp->d_b_displ_inner_core,
-              mp->d_b_accel_inner_core,
-              mp->d_b_accel_outer_core,
-              mp->d_ibool_inner_core,
-              mp->d_ibelm_top_inner_core,
-              mp->d_normal_bottom_outer_core,
-              mp->d_jacobian2D_bottom_outer_core,
-              mp->d_wgllwgll_xy,
-              mp->d_ibool_outer_core,
-              mp->d_ibelm_bottom_outer_core,
-              RHO_BOTTOM_OC,
-              minus_g_icb,
-              GRAVITY_VAL,
-              mp->nspec2D_top_inner_core);
+                                                        mp->d_b_accel_inner_core,
+                                                        mp->d_b_accel_outer_core,
+                                                        mp->d_ibool_inner_core,
+                                                        mp->d_ibelm_top_inner_core,
+                                                        mp->d_normal_bottom_outer_core,
+                                                        mp->d_jacobian2D_bottom_outer_core,
+                                                        mp->d_wgllwgll_xy,
+                                                        mp->d_ibool_outer_core,
+                                                        mp->d_ibelm_bottom_outer_core,
+                                                        RHO_BOTTOM_OC,
+                                                        minus_g_icb,
+                                                        GRAVITY_VAL,
+                                                        mp->nspec2D_top_inner_core);
   }
 
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
@@ -560,13 +560,13 @@
 
 
 __global__ void compute_coupling_ocean_cuda_kernel(realw* accel_crust_mantle,
-               realw* rmass_crust_mantle,
-               realw* rmass_ocean_load,
-               realw* normal_top_crust_mantle,
-               int* ibool_crust_mantle,
-               int* ibelm_top_crust_mantle,
-               int* updated_dof_ocean_load,
-               int NSPEC2D_TOP_CM) {
+                                                   realw* rmass_crust_mantle,
+                                                   realw* rmass_ocean_load,
+                                                   realw* normal_top_crust_mantle,
+                                                   int* ibool_crust_mantle,
+                                                   int* ibelm_top_crust_mantle,
+                                                   int* updated_dof_ocean_load,
+                                                   int NSPEC2D_TOP_CM) {
 
   int i = threadIdx.x;
   int j = threadIdx.y;
@@ -606,7 +606,9 @@
       // make updated component of right-hand side
       // we divide by rmass() which is 1 / M
       // we use the total force which includes the Coriolis term above
-      force_normal_comp = ( accel_crust_mantle[iglob*3]*nx + accel_crust_mantle[iglob*3+1]*ny + accel_crust_mantle[iglob*3+2]*nz ) / rmass_crust_mantle[iglob];
+      force_normal_comp = (  accel_crust_mantle[iglob*3]*nx 
+                           + accel_crust_mantle[iglob*3+1]*ny 
+                           + accel_crust_mantle[iglob*3+2]*nz ) / rmass_crust_mantle[iglob];
 
       additional_term = (rmass_ocean_load[iglob] - rmass_crust_mantle[iglob]) * force_normal_comp;
 
@@ -639,7 +641,8 @@
   dim3 threads(5,5,1);
 
   // initializes temporary array to zero
-  print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,sizeof(int)*mp->NGLOB_CRUST_MANTLE_OCEANS),88501);
+  print_CUDA_error_if_any(cudaMemset(mp->d_updated_dof_ocean_load,0,
+                                     sizeof(int)*mp->NGLOB_CRUST_MANTLE_OCEANS),88501);
 
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING
   exit_on_cuda_error("before kernel compute_coupling_ocean_cuda");
@@ -648,11 +651,11 @@
   compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_accel_crust_mantle,
                                                        mp->d_rmass_crust_mantle,
                                                        mp->d_rmass_ocean_load,
-                   mp->d_normal_top_crust_mantle,
-                   mp->d_ibool_crust_mantle,
-                   mp->d_ibelm_top_crust_mantle,
-                   mp->d_updated_dof_ocean_load,
-                   mp->nspec2D_top_crust_mantle);
+                                                       mp->d_normal_top_crust_mantle,
+                                                       mp->d_ibool_crust_mantle,
+                                                       mp->d_ibelm_top_crust_mantle,
+                                                       mp->d_updated_dof_ocean_load,
+                                                       mp->nspec2D_top_crust_mantle);
 
   // for backward/reconstructed potentials
   if( mp->simulation_type == 3 ) {
@@ -661,13 +664,13 @@
                                        sizeof(int)*mp->NGLOB_CRUST_MANTLE_OCEANS),88502);
 
     compute_coupling_ocean_cuda_kernel<<<grid,threads>>>(mp->d_b_accel_crust_mantle,
-               mp->d_rmass_crust_mantle,
-               mp->d_rmass_ocean_load,
-               mp->d_normal_top_crust_mantle,
-               mp->d_ibool_crust_mantle,
-               mp->d_ibelm_top_crust_mantle,
-               mp->d_updated_dof_ocean_load,
-               mp->nspec2D_top_crust_mantle);
+                                                         mp->d_rmass_crust_mantle,
+                                                         mp->d_rmass_ocean_load,
+                                                         mp->d_normal_top_crust_mantle,
+                                                         mp->d_ibool_crust_mantle,
+                                                         mp->d_ibelm_top_crust_mantle,
+                                                         mp->d_updated_dof_ocean_load,
+                                                         mp->nspec2D_top_crust_mantle);
   }
 
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/compute_forces_crust_mantle_cuda.cu	2012-04-16 21:29:35 UTC (rev 19948)
@@ -1839,7 +1839,7 @@
       //}
 
       Kernel_2_crust_mantle(nb_blocks_to_compute,mp,
-			    *deltat,
+                            *deltat,
                             *iphase,
                             mp->d_ibool_crust_mantle + color_offset_nonpadded,
                             mp->d_ispec_is_tiso_crust_mantle + color_offset_ispec,
@@ -1919,7 +1919,7 @@
     // no mesh coloring: uses atomic updates
 
     Kernel_2_crust_mantle(num_elements,mp,
-			  *deltat,
+                          *deltat,
                           *iphase,
                           mp->d_ibool_crust_mantle,
                           mp->d_ispec_is_tiso_crust_mantle,

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/it_update_displacement_cuda.cu	2012-04-16 21:29:35 UTC (rev 19948)
@@ -445,15 +445,15 @@
 
     // updates only veloc at this point
     kernel_3_veloc_cuda_device<<< grid1, threads1>>>(mp->d_veloc_crust_mantle,
-                                                   mp->d_accel_crust_mantle,
-                                                   mp->NGLOB_CRUST_MANTLE,
-                                                   deltatover2);
+                                                     mp->d_accel_crust_mantle,
+                                                     mp->NGLOB_CRUST_MANTLE,
+                                                     deltatover2);
 
     if(SIMULATION_TYPE == 3) {
       kernel_3_veloc_cuda_device<<< grid1, threads1>>>(mp->d_b_veloc_crust_mantle,
-                                                     mp->d_b_accel_crust_mantle,
-                                                     mp->NGLOB_CRUST_MANTLE,
-                                                     b_deltatover2);
+                                                       mp->d_b_accel_crust_mantle,
+                                                       mp->NGLOB_CRUST_MANTLE,
+                                                       b_deltatover2);
     }
   }
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/mesh_constants_cuda.h	2012-04-16 21:29:35 UTC (rev 19948)
@@ -482,8 +482,8 @@
   realw* d_density_table;
 
   //daniel: TODO old...
-  realw* d_minus_g;
-  realw* d_minus_deriv_gravity;
+  //realw* d_minus_g;
+  //realw* d_minus_deriv_gravity;
 
   // ------------------------------------------------------------------ //
   // rotation

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/cuda/prepare_mesh_constants_cuda.cu	2012-04-16 21:29:35 UTC (rev 19948)
@@ -2041,13 +2041,13 @@
 
   // mass matrix
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_rmass_ocean_load),
-             sizeof(realw)*mp->NGLOB_CRUST_MANTLE_OCEANS),4501);
+                                     sizeof(realw)*mp->NGLOB_CRUST_MANTLE_OCEANS),4501);
   print_CUDA_error_if_any(cudaMemcpy(mp->d_rmass_ocean_load,h_rmass_ocean_load,
-             sizeof(realw)*mp->NGLOB_CRUST_MANTLE_OCEANS,cudaMemcpyHostToDevice),4502);
+                                     sizeof(realw)*mp->NGLOB_CRUST_MANTLE_OCEANS,cudaMemcpyHostToDevice),4502);
 
   // temporary global array: used to synchronize updates on global accel array
   print_CUDA_error_if_any(cudaMalloc((void**)&(mp->d_updated_dof_ocean_load),
-             sizeof(int)*mp->NGLOB_CRUST_MANTLE_OCEANS),4502);
+                                     sizeof(int)*mp->NGLOB_CRUST_MANTLE_OCEANS),4502);
 
 
 #ifdef ENABLE_VERY_SLOW_ERROR_CHECKING

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/Makefile.in	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/Makefile.in	2012-04-16 21:29:35 UTC (rev 19948)
@@ -100,6 +100,7 @@
 	$O/rthetaphi_xyz.shared.o \
 	$O/save_header_file.shared.o \
 	$O/spline_routines.shared.o \
+	$O/write_VTK_file.sharedmpi.o \
 	$(EMPTY_MACRO)
 
 
@@ -109,17 +110,22 @@
 	$O/add_topography_410_650.o \
 	$O/add_topography_cmb.o \
 	$O/add_topography_icb.o \
+	$O/assemble_MPI_central_cube_mesh.mpi.o \
+	$O/assemble_MPI_scalar_mesh.mpi.o \
+	$O/assemble_MPI_vector_mesh.o \
 	$O/calc_jacobian.o \
 	$O/compute_coordinates_grid.o \
 	$O/compute_element_properties.o \
 	$O/compute_volumes.o \
 	$O/create_central_cube.o \
+	$O/create_central_cube_buffers.mpi.o \
 	$O/create_chunk_buffers.mpi.o \
 	$O/create_doubling_elements.o \
 	$O/create_mass_matrices.o \
-	$O/create_regions_mesh.mpi.o \
+	$O/create_regions_mesh.o \
 	$O/create_regular_elements.o \
 	$O/define_superbrick.o \
+	$O/fix_non_blocking_flags.o \
 	$O/get_absorb.o \
 	$O/get_ellipticity.o \
 	$O/get_global.o \
@@ -129,9 +135,11 @@
 	$O/get_MPI_1D_buffers.o \
 	$O/get_MPI_cutplanes_eta.o \
 	$O/get_MPI_cutplanes_xi.o \
+	$O/get_MPI_interfaces.mpi.o \
 	$O/get_perm_color.o \
 	$O/get_shape2D.o \
 	$O/get_shape3D.o \
+	$O/heap_sort.o \
 	$O/initialize_layers.o \
 	$O/lgndr.o \
 	$O/model_1dref.o \
@@ -158,9 +166,12 @@
 	$O/model_sea1d.o \
 	$O/model_sea99_s.mpi.o \
 	$O/moho_stretching.o \
+	$O/read_arrays_buffers_mesher.mpi.o \
 	$O/save_arrays_solver.o \
+	$O/setup_color_perm.mpi.o \
 	$O/sort_array_coordinates.o \
 	$O/stretching_function.o \
+	$O/test_MPI_interfaces.mpi.o \
 	$O/write_AVS_DX_global_chunks_data.o \
 	$O/write_AVS_DX_global_data.o \
 	$O/write_AVS_DX_global_faces_data.o \
@@ -173,6 +184,7 @@
 	$O/compute_area.mpi.o \
 	$O/create_addressing.o \
 	$O/create_meshes.mpi.o \
+	$O/create_MPI_interfaces.mpi.o \
 	$O/finalize_mesher.mpi.o \
 	$O/initialize_mesher.mpi.o \
 	$O/meshfem3D.mpi.o \

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/assemble_MPI_central_cube_mesh.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/assemble_MPI_central_cube_mesh.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/assemble_MPI_central_cube_mesh.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -0,0 +1,324 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
+                                          npoin2D_cube_from_slices, &
+                                          buffer_all_cube_from_slices, buffer_slices, buffer_slices2, &
+                                          ibool_central_cube, &
+                                          receiver_cube_from_slices, ibool_inner_core, &
+                                          idoubling_inner_core, NSPEC_INNER_CORE, &
+                                          ibelm_bottom_inner_core, NSPEC2D_BOTTOM_INNER_CORE,NGLOB_INNER_CORE, &
+                                          vector_assemble,ndim_assemble, &
+                                          iproc_eta,addressing,NCHUNKS,NPROC_XI,NPROC_ETA)
+
+  ! this version of the routine is based on blocking MPI calls
+
+  implicit none
+
+  ! standard include of the MPI library
+  include 'mpif.h'
+  include 'constants.h'
+
+  ! for matching with central cube in inner core
+  integer ichunk, nb_msgs_theor_in_cube, npoin2D_cube_from_slices
+  integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
+  double precision, dimension(npoin2D_cube_from_slices,NDIM) :: &
+    buffer_slices,buffer_slices2
+  double precision, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM) :: &
+    buffer_all_cube_from_slices
+  integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
+  integer receiver_cube_from_slices
+
+  ! local to global mapping
+  integer NSPEC_INNER_CORE,NSPEC2D_BOTTOM_INNER_CORE, NGLOB_INNER_CORE
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
+  integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
+  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
+
+  ! vector
+  integer ndim_assemble
+  real(kind=CUSTOM_REAL), dimension(ndim_assemble,NGLOB_INNER_CORE) :: vector_assemble
+
+  !for addressing of the slices
+  integer, intent(in) :: NCHUNKS,NPROC_XI,NPROC_ETA
+  integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1), intent(in) :: addressing
+  integer, intent(in) :: iproc_eta
+
+  integer ipoin,idimension, ispec2D, ispec
+  integer i,j,k
+  integer sender,receiver,imsg
+
+  real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: array_central_cube
+
+  ! MPI status of messages to be received
+  integer msg_status(MPI_STATUS_SIZE), ier
+
+  ! mask
+  logical, dimension(NGLOB_INNER_CORE) :: mask
+
+  !---
+  !---  now use buffers to assemble mass matrix with central cube once and for all
+  !---
+
+  ! on chunks AB and AB_ANTIPODE, receive all the messages from slices
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+    do imsg = 1,nb_msgs_theor_in_cube-1
+
+  ! receive buffers from slices
+    sender = sender_from_slices_to_cube(imsg)
+    call MPI_RECV(buffer_slices, &
+                ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
+                itag,MPI_COMM_WORLD,msg_status,ier)
+
+  ! copy buffer in 2D array for each slice
+    buffer_all_cube_from_slices(imsg,:,1:ndim_assemble) = buffer_slices(:,1:ndim_assemble)
+
+    enddo
+  endif
+
+  ! send info to central cube from all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
+  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+
+  ! for bottom elements in contact with central cube from the slices side
+    ipoin = 0
+    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+
+      ispec = ibelm_bottom_inner_core(ispec2D)
+
+  ! only for DOFs exactly on surface of central cube (bottom of these elements)
+      k = 1
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ipoin = ipoin + 1
+          buffer_slices(ipoin,1:ndim_assemble) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
+        enddo
+      enddo
+    enddo
+
+  ! send buffer to central cube
+    receiver = receiver_cube_from_slices
+    call MPI_SEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
+                 MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,ier)
+
+    ! daniel: in case NPROC_XI == 1, the other chunks exchange all bottom points with
+    ! CHUNK_AB **and** CHUNK_AB_ANTIPODE
+    if(NPROC_XI==1) then
+      call MPI_SEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
+                   MPI_DOUBLE_PRECISION, &
+                   addressing(CHUNK_AB_ANTIPODE,0,iproc_eta), &
+                   itag,MPI_COMM_WORLD,ier)
+    endif
+
+
+  endif  ! end sending info to central cube
+
+
+  ! exchange of their bottom faces between chunks AB and AB_ANTIPODE
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+    ipoin = 0
+    do ispec = NSPEC_INNER_CORE, 1, -1
+      if (idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE) then
+        k = 1
+        do j = 1,NGLLY
+          do i = 1,NGLLX
+            ipoin = ipoin + 1
+            buffer_slices(ipoin,1:ndim_assemble) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
+          enddo
+        enddo
+      endif
+    enddo
+
+    sender = sender_from_slices_to_cube(nb_msgs_theor_in_cube)
+
+    call MPI_SENDRECV(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
+        itag,buffer_slices2,ndim_assemble*npoin2D_cube_from_slices,&
+        MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+
+   buffer_all_cube_from_slices(nb_msgs_theor_in_cube,:,1:ndim_assemble) = buffer_slices2(:,1:ndim_assemble)
+
+  endif
+
+  !--- now we need to assemble the contributions
+
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+    do idimension = 1,ndim_assemble
+  ! erase contributions to central cube array
+      array_central_cube(:) = 0._CUSTOM_REAL
+
+  ! use indirect addressing to store contributions only once
+  ! distinguish between single and double precision for reals
+      do imsg = 1,nb_msgs_theor_in_cube-1
+        do ipoin = 1,npoin2D_cube_from_slices
+          if(NPROC_XI==1) then
+            if(ibool_central_cube(imsg,ipoin) > 0 ) then
+              array_central_cube(ibool_central_cube(imsg,ipoin)) = buffer_all_cube_from_slices(imsg,ipoin,idimension)
+            endif
+          else
+            if(CUSTOM_REAL == SIZE_REAL) then
+              array_central_cube(ibool_central_cube(imsg,ipoin)) = sngl(buffer_all_cube_from_slices(imsg,ipoin,idimension))
+            else
+              array_central_cube(ibool_central_cube(imsg,ipoin)) = buffer_all_cube_from_slices(imsg,ipoin,idimension)
+            endif
+          endif
+        enddo
+      enddo
+  ! add the constribution of AB or AB_ANTIPODE to sum with the external slices on the edges
+  ! use a mask to avoid taking the same point into account several times.
+      mask(:) = .false.
+      do ipoin = 1,npoin2D_cube_from_slices
+        if(NPROC_XI==1) then
+          if( ibool_central_cube(nb_msgs_theor_in_cube,ipoin) > 0 ) then
+            if (.not. mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin))) then
+              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
+              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
+              buffer_all_cube_from_slices(nb_msgs_theor_in_cube,ipoin,idimension)
+            endif
+            mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = .true.
+          endif
+        else
+          if (.not. mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin))) then
+            if(CUSTOM_REAL == SIZE_REAL) then
+              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
+              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
+              sngl(buffer_all_cube_from_slices(nb_msgs_theor_in_cube,ipoin,idimension))
+            else
+              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
+              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
+              buffer_all_cube_from_slices(nb_msgs_theor_in_cube,ipoin,idimension)
+            endif
+          endif
+          mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = .true.
+        endif
+      enddo
+
+  ! suppress degrees of freedom already assembled at top of cube on edges
+      do ispec = 1,NSPEC_INNER_CORE
+        if(idoubling_inner_core(ispec) == IFLAG_TOP_CENTRAL_CUBE) then
+          k = NGLLZ
+          do j = 1,NGLLY
+            do i = 1,NGLLX
+              array_central_cube(ibool_inner_core(i,j,k,ispec)) = 0._CUSTOM_REAL
+            enddo
+          enddo
+        endif
+      enddo
+
+  ! assemble contributions
+      vector_assemble(idimension,:) = vector_assemble(idimension,:) + array_central_cube(:)
+
+  ! copy sum back
+      do imsg = 1,nb_msgs_theor_in_cube-1
+        do ipoin = 1,npoin2D_cube_from_slices
+          if(NPROC_XI==1) then
+            if( ibool_central_cube(imsg,ipoin) > 0 ) then
+              buffer_all_cube_from_slices(imsg,ipoin,idimension) = &
+                      vector_assemble(idimension,ibool_central_cube(imsg,ipoin))
+            else
+              buffer_all_cube_from_slices(imsg,ipoin,idimension) = 0._CUSTOM_REAL
+            endif
+          else
+            buffer_all_cube_from_slices(imsg,ipoin,idimension) = &
+                    vector_assemble(idimension,ibool_central_cube(imsg,ipoin))
+          endif
+        enddo
+      enddo
+
+    enddo
+
+  endif
+
+  !----------
+
+  ! receive info from central cube on all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
+  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+
+  ! receive buffers from slices
+    sender = receiver_cube_from_slices
+    call MPI_RECV(buffer_slices, &
+                ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
+                itag,MPI_COMM_WORLD,msg_status,ier)
+
+    ! daniel: in case NPROC_XI == 1, the other chunks exchange all bottom points with
+    ! CHUNK_AB **and** CHUNK_AB_ANTIPODE
+    if(NPROC_XI==1) then
+      call MPI_RECV(buffer_slices2, &
+                  ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION, &
+                  addressing(CHUNK_AB_ANTIPODE,0,iproc_eta), &
+                  itag,MPI_COMM_WORLD,msg_status,ier)
+
+      buffer_slices = buffer_slices + buffer_slices2
+    endif
+
+
+  ! for bottom elements in contact with central cube from the slices side
+    ipoin = 0
+    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+
+      ispec = ibelm_bottom_inner_core(ispec2D)
+
+  ! only for DOFs exactly on surface of central cube (bottom of these elements)
+      k = 1
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ipoin = ipoin + 1
+
+  ! distinguish between single and double precision for reals
+          if(CUSTOM_REAL == SIZE_REAL) then
+            vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,1:ndim_assemble))
+          else
+            vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,1:ndim_assemble)
+          endif
+
+        enddo
+      enddo
+    enddo
+
+  endif  ! end receiving info from central cube
+
+  !------- send info back from central cube to slices
+
+  ! on chunk AB & CHUNK_AB_ANTIPODE, send all the messages to slices
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+   do imsg = 1,nb_msgs_theor_in_cube-1
+
+  ! copy buffer in 2D array for each slice
+   buffer_slices(:,1:ndim_assemble) = buffer_all_cube_from_slices(imsg,:,1:ndim_assemble)
+
+  ! send buffers to slices
+    receiver = sender_from_slices_to_cube(imsg)
+    call MPI_SEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
+              MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,ier)
+
+   enddo
+   endif
+
+  end subroutine assemble_MPI_central_cube_block
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/assemble_MPI_scalar_mesh.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/assemble_MPI_scalar_mesh.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/assemble_MPI_scalar_mesh.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -0,0 +1,539 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!----
+!---- assemble the contributions between slices and chunks using MPI
+!----
+
+  subroutine assemble_MPI_scalar_block(myrank,array_val,nglob, &
+            iproc_xi,iproc_eta,ichunk,addressing, &
+            iboolleft_xi,iboolright_xi,iboolleft_eta,iboolright_eta, &
+            npoin2D_faces,npoin2D_xi,npoin2D_eta, &
+            iboolfaces,iboolcorner, &
+            iprocfrom_faces,iprocto_faces,imsg_type, &
+            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
+            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
+            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
+            NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL, &
+            NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY,NCHUNKS)
+
+! this version of the routine is based on blocking MPI calls
+
+  implicit none
+
+! standard include of the MPI library
+  include 'mpif.h'
+
+  include "constants.h"
+  include "precision.h"
+
+  integer myrank,nglob,NCHUNKS
+
+! array to assemble
+  real(kind=CUSTOM_REAL), dimension(nglob) :: array_val
+
+  integer iproc_xi,iproc_eta,ichunk
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi,npoin2D_eta
+  integer npoin2D_faces(NUMFACES_SHARED)
+
+  integer NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY
+  integer NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL
+  integer NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS
+
+! for addressing of the slices
+  integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1) :: addressing
+
+! 2-D addressing and buffers for summation between slices
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX) :: iboolleft_xi,iboolright_xi
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX) :: iboolleft_eta,iboolright_eta
+
+! indirect addressing for each corner of the chunks
+  integer, dimension(NGLOB1D_RADIAL,NUMCORNERS_SHARED) :: iboolcorner
+  integer icount_corners
+
+  integer :: npoin2D_max_all_CM_IC
+  integer, dimension(NGLOB2DMAX_XY,NUMFACES_SHARED) :: iboolfaces
+  real(kind=CUSTOM_REAL), dimension(npoin2D_max_all_CM_IC) :: buffer_send_faces_scalar,buffer_received_faces_scalar
+
+! buffers for send and receive between corners of the chunks
+  real(kind=CUSTOM_REAL), dimension(NGLOB1D_RADIAL) :: buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar
+
+! ---- arrays to assemble between chunks
+
+! communication pattern for faces between chunks
+  integer, dimension(NUMMSGS_FACES) :: iprocfrom_faces,iprocto_faces,imsg_type
+
+! communication pattern for corners between chunks
+  integer, dimension(NCORNERSCHUNKS) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+! MPI status of messages to be received
+  integer msg_status(MPI_STATUS_SIZE)
+
+  integer ipoin,ipoin2D,ipoin1D
+  integer sender,receiver,ier
+  integer imsg,imsg_loop
+  integer icount_faces,npoin2D_chunks
+
+! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
+
+! check flag to see if we need to assemble (might be turned off when debugging)
+  if (.not. ACTUALLY_ASSEMBLE_MPI_SLICES) return
+
+! here we have to assemble all the contributions between slices using MPI
+
+!----
+!---- assemble the contributions between slices using MPI
+!----
+
+!----
+!---- first assemble along xi using the 2-D topology
+!----
+
+! assemble along xi only if more than one slice
+  if(NPROC_XI > 1) then
+
+    ! slices copy the right face into the buffer
+    do ipoin=1,npoin2D_xi(2)
+      buffer_send_faces_scalar(ipoin) = array_val(iboolright_xi(ipoin))
+    enddo
+
+    ! send messages forward along each row
+    if(iproc_xi == 0) then
+      sender = MPI_PROC_NULL
+    else
+      sender = addressing(ichunk,iproc_xi - 1,iproc_eta)
+    endif
+    if(iproc_xi == NPROC_XI-1) then
+      receiver = MPI_PROC_NULL
+    else
+      receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
+    endif
+    call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,receiver, &
+          itag2,buffer_received_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,sender, &
+          itag,MPI_COMM_WORLD,msg_status,ier)
+
+    ! all slices add the buffer received to the contributions on the left face
+    if(iproc_xi > 0) then
+      do ipoin=1,npoin2D_xi(1)
+        array_val(iboolleft_xi(ipoin)) = array_val(iboolleft_xi(ipoin)) + &
+                                buffer_received_faces_scalar(ipoin)
+      enddo
+    endif
+
+    ! the contributions are correctly assembled on the left side of each slice
+    ! now we have to send the result back to the sender
+    ! all slices copy the left face into the buffer
+    do ipoin=1,npoin2D_xi(1)
+      buffer_send_faces_scalar(ipoin) = array_val(iboolleft_xi(ipoin))
+    enddo
+
+    ! send messages backward along each row
+    if(iproc_xi == NPROC_XI-1) then
+      sender = MPI_PROC_NULL
+    else
+      sender = addressing(ichunk,iproc_xi + 1,iproc_eta)
+    endif
+    if(iproc_xi == 0) then
+      receiver = MPI_PROC_NULL
+    else
+      receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
+    endif
+    call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,receiver, &
+          itag2,buffer_received_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,sender, &
+          itag,MPI_COMM_WORLD,msg_status,ier)
+
+    ! all slices copy the buffer received to the contributions on the right face
+    if(iproc_xi < NPROC_XI-1) then
+      do ipoin=1,npoin2D_xi(2)
+        array_val(iboolright_xi(ipoin)) = buffer_received_faces_scalar(ipoin)
+      enddo
+    endif
+
+  endif
+
+!----
+!---- then assemble along eta using the 2-D topology
+!----
+
+! assemble along eta only if more than one slice
+  if(NPROC_ETA > 1) then
+
+    ! slices copy the right face into the buffer
+    do ipoin=1,npoin2D_eta(2)
+      buffer_send_faces_scalar(ipoin) = array_val(iboolright_eta(ipoin))
+    enddo
+
+    ! send messages forward along each row
+    if(iproc_eta == 0) then
+      sender = MPI_PROC_NULL
+    else
+      sender = addressing(ichunk,iproc_xi,iproc_eta - 1)
+    endif
+    if(iproc_eta == NPROC_ETA-1) then
+      receiver = MPI_PROC_NULL
+    else
+      receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
+    endif
+    call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,receiver, &
+      itag2,buffer_received_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,sender, &
+      itag,MPI_COMM_WORLD,msg_status,ier)
+
+    ! all slices add the buffer received to the contributions on the left face
+    if(iproc_eta > 0) then
+      do ipoin=1,npoin2D_eta(1)
+        array_val(iboolleft_eta(ipoin)) = array_val(iboolleft_eta(ipoin)) + &
+                                buffer_received_faces_scalar(ipoin)
+      enddo
+    endif
+
+    ! the contributions are correctly assembled on the left side of each slice
+    ! now we have to send the result back to the sender
+    ! all slices copy the left face into the buffer
+    do ipoin=1,npoin2D_eta(1)
+      buffer_send_faces_scalar(ipoin) = array_val(iboolleft_eta(ipoin))
+    enddo
+
+    ! send messages backward along each row
+    if(iproc_eta == NPROC_ETA-1) then
+      sender = MPI_PROC_NULL
+    else
+      sender = addressing(ichunk,iproc_xi,iproc_eta + 1)
+    endif
+    if(iproc_eta == 0) then
+      receiver = MPI_PROC_NULL
+    else
+      receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
+    endif
+    call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,receiver, &
+      itag2,buffer_received_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,sender, &
+      itag,MPI_COMM_WORLD,msg_status,ier)
+
+    ! all slices copy the buffer received to the contributions on the right face
+    if(iproc_eta < NPROC_ETA-1) then
+      do ipoin=1,npoin2D_eta(2)
+        array_val(iboolright_eta(ipoin)) = buffer_received_faces_scalar(ipoin)
+      enddo
+    endif
+
+  endif
+
+!----
+!---- start MPI assembling phase between chunks
+!----
+
+! check flag to see if we need to assemble (might be turned off when debugging)
+! and do not assemble if only one chunk
+  if (.not. ACTUALLY_ASSEMBLE_MPI_CHUNKS .or. NCHUNKS == 1) return
+
+! ***************************************************************
+!  transmit messages in forward direction (iprocfrom -> iprocto)
+! ***************************************************************
+
+!---- put slices in receive mode
+!---- a given slice can belong to at most two faces
+
+! use three step scheme that can never deadlock
+! scheme for faces cannot deadlock even if NPROC_XI = NPROC_ETA = 1
+  do imsg_loop = 1,NUM_MSG_TYPES
+
+    icount_faces = 0
+    do imsg = 1,NUMMSGS_FACES
+      if(myrank==iprocfrom_faces(imsg) .or. &
+           myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+      if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+        sender = iprocfrom_faces(imsg)
+        npoin2D_chunks = npoin2D_faces(icount_faces)
+        call MPI_RECV(buffer_received_faces_scalar, &
+                  npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
+                  itag,MPI_COMM_WORLD,msg_status,ier)
+        do ipoin2D=1,npoin2D_chunks
+          array_val(iboolfaces(ipoin2D,icount_faces)) = &
+             array_val(iboolfaces(ipoin2D,icount_faces)) + buffer_received_faces_scalar(ipoin2D)
+        enddo
+      endif
+    enddo
+
+    !---- put slices in send mode
+    !---- a given slice can belong to at most two faces
+    icount_faces = 0
+    do imsg = 1,NUMMSGS_FACES
+      if(myrank==iprocfrom_faces(imsg) .or. &
+           myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+      if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+        receiver = iprocto_faces(imsg)
+        npoin2D_chunks = npoin2D_faces(icount_faces)
+        do ipoin2D=1,npoin2D_chunks
+          buffer_send_faces_scalar(ipoin2D) = array_val(iboolfaces(ipoin2D,icount_faces))
+        enddo
+        call MPI_SEND(buffer_send_faces_scalar,npoin2D_chunks, &
+                  CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+      endif
+    enddo
+
+    ! *********************************************************************
+    !  transmit messages back in opposite direction (iprocto -> iprocfrom)
+    ! *********************************************************************
+
+    !---- put slices in receive mode
+    !---- a given slice can belong to at most two faces
+
+    icount_faces = 0
+    do imsg = 1,NUMMSGS_FACES
+      if(myrank==iprocfrom_faces(imsg) .or. &
+           myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+      if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+        sender = iprocto_faces(imsg)
+        npoin2D_chunks = npoin2D_faces(icount_faces)
+        call MPI_RECV(buffer_received_faces_scalar, &
+                  npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
+                  itag,MPI_COMM_WORLD,msg_status,ier)
+        do ipoin2D=1,npoin2D_chunks
+          array_val(iboolfaces(ipoin2D,icount_faces)) = buffer_received_faces_scalar(ipoin2D)
+        enddo
+      endif
+    enddo
+
+    !---- put slices in send mode
+    !---- a given slice can belong to at most two faces
+    icount_faces = 0
+    do imsg = 1,NUMMSGS_FACES
+      if(myrank==iprocfrom_faces(imsg) .or. &
+           myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+      if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+        receiver = iprocfrom_faces(imsg)
+        npoin2D_chunks = npoin2D_faces(icount_faces)
+        do ipoin2D=1,npoin2D_chunks
+          buffer_send_faces_scalar(ipoin2D) = array_val(iboolfaces(ipoin2D,icount_faces))
+        enddo
+        call MPI_SEND(buffer_send_faces_scalar,npoin2D_chunks, &
+                  CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+      endif
+    enddo
+
+! end of anti-deadlocking loop
+  enddo
+
+!----
+!---- start MPI assembling corners
+!----
+
+! scheme for corners cannot deadlock even if NPROC_XI = NPROC_ETA = 1
+
+! ***************************************************************
+!  transmit messages in forward direction (two workers -> master)
+! ***************************************************************
+
+  icount_corners = 0
+
+  do imsg = 1,NCORNERSCHUNKS
+
+    if(myrank == iproc_master_corners(imsg) .or. &
+       myrank == iproc_worker1_corners(imsg) .or. &
+       (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) icount_corners = icount_corners + 1
+
+    !---- receive messages from the two workers on the master
+    if(myrank==iproc_master_corners(imsg)) then
+
+      ! receive from worker #1 and add to local array
+      sender = iproc_worker1_corners(imsg)
+      call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
+            CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+      do ipoin1D=1,NGLOB1D_RADIAL
+        array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
+                 buffer_recv_chunkcorn_scalar(ipoin1D)
+      enddo
+
+      ! receive from worker #2 and add to local array
+      if(NCHUNKS /= 2) then
+        sender = iproc_worker2_corners(imsg)
+        call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
+              CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+        do ipoin1D=1,NGLOB1D_RADIAL
+          array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
+                   buffer_recv_chunkcorn_scalar(ipoin1D)
+        enddo
+      endif
+
+    endif
+
+    !---- send messages from the two workers to the master
+    if(myrank==iproc_worker1_corners(imsg) .or. &
+                (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+
+      receiver = iproc_master_corners(imsg)
+      do ipoin1D=1,NGLOB1D_RADIAL
+        buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
+      enddo
+      call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
+                receiver,itag,MPI_COMM_WORLD,ier)
+
+    endif
+
+    ! *********************************************************************
+    !  transmit messages back in opposite direction (master -> two workers)
+    ! *********************************************************************
+
+    !---- receive messages from the master on the two workers
+    if(myrank==iproc_worker1_corners(imsg) .or. &
+                (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+
+      ! receive from master and copy to local array
+      sender = iproc_master_corners(imsg)
+      call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
+            CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+      do ipoin1D=1,NGLOB1D_RADIAL
+        array_val(iboolcorner(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_scalar(ipoin1D)
+      enddo
+
+    endif
+
+    !---- send messages from the master to the two workers
+    if(myrank==iproc_master_corners(imsg)) then
+
+      do ipoin1D=1,NGLOB1D_RADIAL
+        buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
+      enddo
+
+      ! send to worker #1
+      receiver = iproc_worker1_corners(imsg)
+      call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
+                receiver,itag,MPI_COMM_WORLD,ier)
+
+      ! send to worker #2
+      if(NCHUNKS /= 2) then
+        receiver = iproc_worker2_corners(imsg)
+        call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
+                  receiver,itag,MPI_COMM_WORLD,ier)
+      endif
+
+    endif
+
+  enddo
+
+  end subroutine assemble_MPI_scalar_block
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,array_val, &
+                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                        my_neighbours_ext_mesh)
+
+! blocking send/receive
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: NPROC
+  integer :: NGLOB_AB
+
+  ! array to assemble
+  real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: array_val
+
+  integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+  integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+  integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+
+  ! local parameters
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_send_scalar_ext_mesh
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_recv_scalar_ext_mesh
+  integer, dimension(:), allocatable :: request_send_scalar_ext_mesh
+  integer, dimension(:), allocatable :: request_recv_scalar_ext_mesh
+
+
+  integer ipoin,iinterface,ier
+
+! here we have to assemble all the contributions between partitions using MPI
+
+  ! assemble only if more than one partition
+  if(NPROC > 1) then
+
+    allocate(buffer_send_scalar_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array buffer_send_scalar_ext_mesh'
+    allocate(buffer_recv_scalar_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array buffer_recv_scalar_ext_mesh'
+    allocate(request_send_scalar_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array request_send_scalar_ext_mesh'
+    allocate(request_recv_scalar_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array request_recv_scalar_ext_mesh'
+
+    ! partition border copy into the buffer
+    do iinterface = 1, num_interfaces_ext_mesh
+      do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+        buffer_send_scalar_ext_mesh(ipoin,iinterface) = array_val(ibool_interfaces_ext_mesh(ipoin,iinterface))
+      enddo
+    enddo
+
+    ! send messages
+    do iinterface = 1, num_interfaces_ext_mesh
+      ! non-blocking synchronous send request
+      call isend_cr(buffer_send_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+           nibool_interfaces_ext_mesh(iinterface), &
+           my_neighbours_ext_mesh(iinterface), &
+           itag, &
+           request_send_scalar_ext_mesh(iinterface) &
+           )
+      ! receive request
+      call irecv_cr(buffer_recv_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+           nibool_interfaces_ext_mesh(iinterface), &
+           my_neighbours_ext_mesh(iinterface), &
+           itag, &
+           request_recv_scalar_ext_mesh(iinterface) &
+           )
+    enddo
+
+    ! wait for communications completion (recv)
+    do iinterface = 1, num_interfaces_ext_mesh
+      call wait_req(request_recv_scalar_ext_mesh(iinterface))
+    enddo
+
+    ! adding contributions of neighbours
+    do iinterface = 1, num_interfaces_ext_mesh
+      do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+        array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+             array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) + buffer_recv_scalar_ext_mesh(ipoin,iinterface)
+      enddo
+    enddo
+
+    ! wait for communications completion (send)
+    do iinterface = 1, num_interfaces_ext_mesh
+      call wait_req(request_send_scalar_ext_mesh(iinterface))
+    enddo
+
+    deallocate(buffer_send_scalar_ext_mesh)
+    deallocate(buffer_recv_scalar_ext_mesh)
+    deallocate(request_send_scalar_ext_mesh)
+    deallocate(request_recv_scalar_ext_mesh)
+
+  endif
+
+  end subroutine assemble_MPI_scalar_ext_mesh
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/assemble_MPI_vector_mesh.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/assemble_MPI_vector_mesh.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/assemble_MPI_vector_mesh.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -0,0 +1,125 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+  subroutine assemble_MPI_vector_ext_mesh(NPROC,NGLOB_AB,array_val, &
+                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                        my_neighbours_ext_mesh)
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: NPROC
+  integer :: NGLOB_AB
+
+! array to assemble
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_AB) :: array_val
+
+  integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+  integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+  integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
+
+  ! local parameters
+
+  ! send/receive temporary buffers
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_send_vector_ext_mesh
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_recv_vector_ext_mesh
+
+  ! requests
+  integer, dimension(:), allocatable :: request_send_vector_ext_mesh
+  integer, dimension(:), allocatable :: request_recv_vector_ext_mesh
+
+  integer ipoin,iinterface,ier
+
+
+! here we have to assemble all the contributions between partitions using MPI
+
+  ! assemble only if more than one partition
+  if(NPROC > 1) then
+
+    allocate(buffer_send_vector_ext_mesh(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array buffer_send_vector_ext_mesh'
+    allocate(buffer_recv_vector_ext_mesh(NDIM,max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array buffer_recv_vector_ext_mesh'
+    allocate(request_send_vector_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array request_send_vector_ext_mesh'
+    allocate(request_recv_vector_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array request_recv_vector_ext_mesh'
+
+    ! partition border copy into the buffer
+    do iinterface = 1, num_interfaces_ext_mesh
+      do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+        buffer_send_vector_ext_mesh(:,ipoin,iinterface) = &
+          array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface))
+      enddo
+    enddo
+
+    ! send messages
+    do iinterface = 1, num_interfaces_ext_mesh
+      call isend_cr(buffer_send_vector_ext_mesh(1,1,iinterface), &
+           NDIM*nibool_interfaces_ext_mesh(iinterface), &
+           my_neighbours_ext_mesh(iinterface), &
+           itag, &
+           request_send_vector_ext_mesh(iinterface) &
+           )
+      call irecv_cr(buffer_recv_vector_ext_mesh(1,1,iinterface), &
+           NDIM*nibool_interfaces_ext_mesh(iinterface), &
+           my_neighbours_ext_mesh(iinterface), &
+           itag, &
+           request_recv_vector_ext_mesh(iinterface) &
+           )
+    enddo
+
+    ! wait for communications completion (recv)
+    do iinterface = 1, num_interfaces_ext_mesh
+      call wait_req(request_recv_vector_ext_mesh(iinterface))
+    enddo
+
+    ! adding contributions of neighbours
+    do iinterface = 1, num_interfaces_ext_mesh
+      do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+        array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+             array_val(:,ibool_interfaces_ext_mesh(ipoin,iinterface)) &
+             + buffer_recv_vector_ext_mesh(:,ipoin,iinterface)
+      enddo
+    enddo
+
+    ! wait for communications completion (send)
+    do iinterface = 1, num_interfaces_ext_mesh
+      call wait_req(request_send_vector_ext_mesh(iinterface))
+    enddo
+
+    deallocate(buffer_send_vector_ext_mesh)
+    deallocate(buffer_recv_vector_ext_mesh)
+    deallocate(request_send_vector_ext_mesh)
+    deallocate(request_recv_vector_ext_mesh)
+
+  endif
+
+  end subroutine assemble_MPI_vector_ext_mesh

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/compute_area.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/compute_area.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/compute_area.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -99,6 +99,4 @@
 
   endif
 
-
   end subroutine compute_area
-

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_MPI_interfaces.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_MPI_interfaces.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_MPI_interfaces.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -0,0 +1,1640 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+module create_MPI_interfaces_par
+
+  use constants,only: CUSTOM_REAL,NUMFACES_SHARED,NB_SQUARE_EDGES_ONEDIR,NDIM,IMAIN
+  
+  ! indirect addressing for each message for faces and corners of the chunks
+  ! a given slice can belong to at most one corner and at most two faces
+  integer :: NGLOB2DMAX_XY
+
+  ! number of faces between chunks
+  integer :: NUMMSGS_FACES
+
+  ! number of corners between chunks
+  integer :: NCORNERSCHUNKS
+
+  ! number of message types
+  integer :: NUM_MSG_TYPES
+
+  integer :: NGLOB1D_RADIAL_CM 
+  integer :: NGLOB1D_RADIAL_OC
+  integer :: NGLOB1D_RADIAL_IC
+
+  integer :: NGLOB2DMAX_XMIN_XMAX_CM
+  integer :: NGLOB2DMAX_XMIN_XMAX_OC
+  integer :: NGLOB2DMAX_XMIN_XMAX_IC
+
+  integer :: NGLOB2DMAX_YMIN_YMAX_CM
+  integer :: NGLOB2DMAX_YMIN_YMAX_OC
+  integer :: NGLOB2DMAX_YMIN_YMAX_IC
+
+  integer :: NSPEC2DMAX_XMIN_XMAX_CM
+  integer :: NSPEC2DMAX_YMIN_YMAX_CM
+  integer :: NSPEC2D_BOTTOM_CM
+  integer :: NSPEC2D_TOP_CM
+
+  integer :: NSPEC2DMAX_XMIN_XMAX_IC
+  integer :: NSPEC2DMAX_YMIN_YMAX_IC
+  integer :: NSPEC2D_BOTTOM_IC
+  integer :: NSPEC2D_TOP_IC
+
+  integer :: NSPEC2DMAX_XMIN_XMAX_OC
+  integer :: NSPEC2DMAX_YMIN_YMAX_OC
+  integer :: NSPEC2D_BOTTOM_OC
+  integer :: NSPEC2D_TOP_OC
+
+  integer :: NSPEC_CRUST_MANTLE
+  integer :: NSPEC_INNER_CORE
+  integer :: NSPEC_OUTER_CORE
+
+  integer :: NGLOB_CRUST_MANTLE
+  integer :: NGLOB_INNER_CORE
+  integer :: NGLOB_OUTER_CORE
+  
+  !-----------------------------------------------------------------
+  ! assembly
+  !-----------------------------------------------------------------
+
+  ! ---- arrays to assemble between chunks
+  ! communication pattern for faces between chunks
+  integer, dimension(:),allocatable :: iprocfrom_faces,iprocto_faces,imsg_type
+  ! communication pattern for corners between chunks
+  integer, dimension(:),allocatable :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+  ! this for non blocking MPI
+
+  ! buffers for send and receive between faces of the slices and the chunks
+  ! we use the same buffers to assemble scalars and vectors because vectors are
+  ! always three times bigger and therefore scalars can use the first part
+  ! of the vector buffer in memory even if it has an additional index here
+  integer :: npoin2D_max_all_CM_IC
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_send_faces,buffer_received_faces
+
+  ! buffers for send and receive between corners of the chunks
+  real(kind=CUSTOM_REAL), dimension(:),allocatable :: &
+    buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar
+
+  ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+  real(kind=CUSTOM_REAL), dimension(:,:),allocatable :: &
+     buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector
+
+
+  ! collected MPI interfaces
+  ! MPI crust/mantle mesh
+  integer :: num_interfaces_crust_mantle
+  integer :: max_nibool_interfaces_crust_mantle
+  integer, dimension(:), allocatable :: my_neighbours_crust_mantle,nibool_interfaces_crust_mantle
+  integer, dimension(:,:), allocatable :: ibool_interfaces_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle
+
+  integer, dimension(:), allocatable :: request_send_vector_crust_mantle,request_recv_vector_crust_mantle
+
+  ! MPI inner core mesh
+  integer :: num_interfaces_inner_core
+  integer :: max_nibool_interfaces_inner_core
+  integer, dimension(:), allocatable :: my_neighbours_inner_core,nibool_interfaces_inner_core
+  integer, dimension(:,:), allocatable :: ibool_interfaces_inner_core
+
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_send_vector_inner_core,buffer_recv_vector_inner_core
+
+  integer, dimension(:), allocatable :: request_send_vector_inner_core,request_recv_vector_inner_core
+
+  ! MPI outer core mesh
+  integer :: num_interfaces_outer_core
+  integer :: max_nibool_interfaces_outer_core
+  integer, dimension(:), allocatable :: my_neighbours_outer_core,nibool_interfaces_outer_core
+  integer, dimension(:,:), allocatable :: ibool_interfaces_outer_core
+
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core
+
+  integer, dimension(:), allocatable :: request_send_scalar_outer_core,request_recv_scalar_outer_core
+
+  ! temporary arrays for elements on slices or edges
+  logical, dimension(:),allocatable :: is_on_a_slice_edge_crust_mantle, &
+    is_on_a_slice_edge_inner_core,is_on_a_slice_edge_outer_core
+
+  logical, dimension(:),allocatable :: mask_ibool
+
+  !--------------------------------------
+  ! crust mantle
+  !--------------------------------------
+  real(kind=CUSTOM_REAL), dimension(:),allocatable :: &
+    xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle
+  integer, dimension(:),allocatable :: idoubling_crust_mantle
+  integer, dimension(:,:,:,:),allocatable :: ibool_crust_mantle
+
+  ! assembly
+  integer :: npoin2D_faces_crust_mantle(NUMFACES_SHARED)
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle
+
+  ! indirect addressing for each corner of the chunks
+  integer, dimension(:,:),allocatable :: iboolcorner_crust_mantle
+
+  ! 2-D addressing and buffers for summation between slices
+  integer, dimension(:),allocatable :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
+  integer, dimension(:),allocatable :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
+
+  integer, dimension(:,:),allocatable :: iboolfaces_crust_mantle
+
+  ! inner / outer elements crust/mantle region
+  integer :: num_phase_ispec_crust_mantle
+  integer :: nspec_inner_crust_mantle,nspec_outer_crust_mantle
+  integer, dimension(:,:), allocatable :: phase_ispec_inner_crust_mantle
+
+  ! mesh coloring
+  integer :: num_colors_outer_crust_mantle,num_colors_inner_crust_mantle
+  integer,dimension(:),allocatable :: num_elem_colors_crust_mantle
+
+  !--------------------------------------
+  ! outer core
+  !--------------------------------------
+  real(kind=CUSTOM_REAL), dimension(:),allocatable :: &
+    xstore_outer_core,ystore_outer_core,zstore_outer_core
+  integer, dimension(:),allocatable :: idoubling_outer_core
+  integer, dimension(:,:,:,:),allocatable :: ibool_outer_core
+  
+  ! assembly
+  integer :: npoin2D_faces_outer_core(NUMFACES_SHARED)
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_outer_core,npoin2D_eta_outer_core
+
+  ! indirect addressing for each corner of the chunks
+  integer, dimension(:,:),allocatable :: iboolcorner_outer_core
+
+  ! 2-D addressing and buffers for summation between slices
+  integer, dimension(:),allocatable :: iboolleft_xi_outer_core,iboolright_xi_outer_core
+  integer, dimension(:),allocatable :: iboolleft_eta_outer_core,iboolright_eta_outer_core
+
+  integer, dimension(:,:),allocatable :: iboolfaces_outer_core
+
+  ! inner / outer elements outer core region
+  integer :: num_phase_ispec_outer_core
+  integer :: nspec_inner_outer_core,nspec_outer_outer_core
+  integer, dimension(:,:), allocatable :: phase_ispec_inner_outer_core
+
+  ! mesh coloring
+  integer :: num_colors_outer_outer_core,num_colors_inner_outer_core
+  integer,dimension(:),allocatable :: num_elem_colors_outer_core
+
+
+  !--------------------------------------
+  ! inner core
+  !--------------------------------------
+
+  real(kind=CUSTOM_REAL), dimension(:),allocatable :: &
+    xstore_inner_core,ystore_inner_core,zstore_inner_core
+  integer, dimension(:),allocatable :: idoubling_inner_core
+  integer, dimension(:,:,:,:),allocatable :: ibool_inner_core
+
+
+  ! for matching with central cube in inner core
+  integer, dimension(:), allocatable :: sender_from_slices_to_cube
+  integer, dimension(:,:), allocatable :: ibool_central_cube
+  double precision, dimension(:,:), allocatable :: buffer_slices,buffer_slices2
+  double precision, dimension(:,:,:), allocatable :: buffer_all_cube_from_slices
+  integer nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,receiver_cube_from_slices
+
+  integer :: nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
+            nspec2D_ymin_inner_core,nspec2D_ymax_inner_core
+
+  integer, dimension(:),allocatable :: ibelm_xmin_inner_core,ibelm_xmax_inner_core
+  integer, dimension(:),allocatable :: ibelm_ymin_inner_core,ibelm_ymax_inner_core
+  integer, dimension(:),allocatable :: ibelm_bottom_inner_core
+  integer, dimension(:),allocatable :: ibelm_top_inner_core
+
+  integer :: npoin2D_faces_inner_core(NUMFACES_SHARED)
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_inner_core,npoin2D_eta_inner_core
+
+  ! indirect addressing for each corner of the chunks
+  integer, dimension(:,:),allocatable :: iboolcorner_inner_core
+
+  ! 2-D addressing and buffers for summation between slices
+  integer, dimension(:),allocatable :: iboolleft_xi_inner_core,iboolright_xi_inner_core
+  integer, dimension(:),allocatable :: iboolleft_eta_inner_core,iboolright_eta_inner_core
+
+  integer, dimension(:,:),allocatable :: iboolfaces_inner_core
+
+  ! inner / outer elements inner core region
+  integer :: num_phase_ispec_inner_core
+  integer :: nspec_inner_inner_core,nspec_outer_inner_core
+  integer, dimension(:,:), allocatable :: phase_ispec_inner_inner_core
+
+  ! mesh coloring
+  integer :: num_colors_outer_inner_core,num_colors_inner_inner_core
+  integer,dimension(:),allocatable :: num_elem_colors_inner_core
+  
+end module create_MPI_interfaces_par
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine create_MPI_interfaces()
+
+  use meshfem3D_par
+  use create_MPI_interfaces_par
+  implicit none
+  
+  ! sets up arrays
+  call cmi_read_addressing()
+
+  ! reads "iboolleft_..txt", "iboolright_..txt" (and "list_messages_..txt", "buffer_...txt") files and sets up MPI buffers
+  call cmi_read_buffers()
+
+  ! sets up MPI interfaces
+  call cmi_setup_MPIinterfaces()
+
+  ! sets up inner/outer element arrays
+  call cmi_setup_InnerOuter()
+
+  ! sets up mesh coloring
+  call cmi_setup_color_perm()
+  
+  ! saves interface infos
+  call cmi_save_interfaces()
+    
+  ! frees memory
+  call cmi_free_arrays()
+
+  end subroutine create_MPI_interfaces
+  
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine cmi_read_addressing()
+
+  use meshfem3D_par
+  use create_MPI_interfaces_par
+  implicit none
+
+  ! local parameters
+  integer :: NUM_FACES,NPROC_ONE_DIRECTION
+  integer :: ier
+  
+  ! define maximum size for message buffers
+  ! use number of elements found in the mantle since it is the largest region
+  NGLOB2DMAX_XY = max(NGLOB2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE))
+
+  ! number of corners and faces shared between chunks and number of message types
+  if(NCHUNKS == 1 .or. NCHUNKS == 2) then
+    NCORNERSCHUNKS = 1
+    NUM_FACES = 1
+    NUM_MSG_TYPES = 1
+  else if(NCHUNKS == 3) then
+    NCORNERSCHUNKS = 1
+    NUM_FACES = 1
+    NUM_MSG_TYPES = 3
+  else if(NCHUNKS == 6) then
+    NCORNERSCHUNKS = 8
+    NUM_FACES = 4
+    NUM_MSG_TYPES = 3
+  else
+    call exit_MPI(myrank,'number of chunks must be either 1, 2, 3 or 6')
+  endif
+  ! if more than one chunk then same number of processors in each direction
+  NPROC_ONE_DIRECTION = NPROC_XI
+  ! total number of messages corresponding to these common faces
+  NUMMSGS_FACES = NPROC_ONE_DIRECTION*NUM_FACES*NUM_MSG_TYPES
+
+
+  allocate(iprocfrom_faces(NUMMSGS_FACES), &
+          iprocto_faces(NUMMSGS_FACES), &
+          imsg_type(NUMMSGS_FACES),stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating iproc faces arrays')
+  
+  ! communication pattern for corners between chunks
+  allocate(iproc_master_corners(NCORNERSCHUNKS), &
+          iproc_worker1_corners(NCORNERSCHUNKS), &
+          iproc_worker2_corners(NCORNERSCHUNKS),stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating iproc corner arrays')
+  
+
+  ! parameters from header file
+  NGLOB1D_RADIAL_CM = NGLOB1D_RADIAL(IREGION_CRUST_MANTLE)
+  NGLOB1D_RADIAL_OC = NGLOB1D_RADIAL(IREGION_OUTER_CORE)
+  NGLOB1D_RADIAL_IC = NGLOB1D_RADIAL(IREGION_INNER_CORE)
+
+  NGLOB2DMAX_XMIN_XMAX_CM = NGLOB2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE)
+  NGLOB2DMAX_XMIN_XMAX_OC = NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE)
+  NGLOB2DMAX_XMIN_XMAX_IC = NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE)
+
+  NGLOB2DMAX_YMIN_YMAX_CM = NGLOB2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE)
+  NGLOB2DMAX_YMIN_YMAX_OC = NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE)
+  NGLOB2DMAX_YMIN_YMAX_IC = NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE)
+
+  NSPEC2DMAX_XMIN_XMAX_CM = NSPEC2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE)
+  NSPEC2DMAX_YMIN_YMAX_CM = NSPEC2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE)
+  NSPEC2D_BOTTOM_CM = NSPEC2D_BOTTOM(IREGION_CRUST_MANTLE)
+  NSPEC2D_TOP_CM = NSPEC2D_TOP(IREGION_CRUST_MANTLE)
+
+  NSPEC2DMAX_XMIN_XMAX_IC = NSPEC2DMAX_XMIN_XMAX(IREGION_INNER_CORE)
+  NSPEC2DMAX_YMIN_YMAX_IC = NSPEC2DMAX_YMIN_YMAX(IREGION_INNER_CORE)
+  NSPEC2D_BOTTOM_IC = NSPEC2D_BOTTOM(IREGION_INNER_CORE)
+  NSPEC2D_TOP_IC = NSPEC2D_TOP(IREGION_INNER_CORE)
+
+  NSPEC2DMAX_XMIN_XMAX_OC = NSPEC2DMAX_XMIN_XMAX(IREGION_OUTER_CORE)
+  NSPEC2DMAX_YMIN_YMAX_OC = NSPEC2DMAX_YMIN_YMAX(IREGION_OUTER_CORE)
+  NSPEC2D_BOTTOM_OC = NSPEC2D_BOTTOM(IREGION_OUTER_CORE)
+  NSPEC2D_TOP_OC = NSPEC2D_TOP(IREGION_OUTER_CORE)
+
+  NSPEC_CRUST_MANTLE = NSPEC(IREGION_CRUST_MANTLE)
+  NSPEC_INNER_CORE = NSPEC(IREGION_INNER_CORE)
+  NSPEC_OUTER_CORE = NSPEC(IREGION_OUTER_CORE)
+
+  NGLOB_CRUST_MANTLE = NGLOB(IREGION_CRUST_MANTLE)
+  NGLOB_INNER_CORE = NGLOB(IREGION_INNER_CORE)
+  NGLOB_OUTER_CORE = NGLOB(IREGION_OUTER_CORE)
+
+  ! allocates arrays
+
+  allocate(buffer_send_chunkcorn_scalar(NGLOB1D_RADIAL_CM), &
+          buffer_recv_chunkcorn_scalar(NGLOB1D_RADIAL_CM))
+
+  allocate(buffer_send_chunkcorn_vector(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC), &
+          buffer_recv_chunkcorn_vector(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC))
+
+  ! crust mantle
+  allocate(iboolcorner_crust_mantle(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED))
+  allocate(iboolleft_xi_crust_mantle(NGLOB2DMAX_XMIN_XMAX_CM), &
+          iboolright_xi_crust_mantle(NGLOB2DMAX_XMIN_XMAX_CM))          
+  allocate(iboolleft_eta_crust_mantle(NGLOB2DMAX_YMIN_YMAX_CM), &
+          iboolright_eta_crust_mantle(NGLOB2DMAX_YMIN_YMAX_CM))
+  allocate(iboolfaces_crust_mantle(NGLOB2DMAX_XY,NUMFACES_SHARED))
+
+  ! outer core
+  allocate(iboolcorner_outer_core(NGLOB1D_RADIAL_OC,NUMCORNERS_SHARED))
+  allocate(iboolleft_xi_outer_core(NGLOB2DMAX_XMIN_XMAX_OC), &
+          iboolright_xi_outer_core(NGLOB2DMAX_XMIN_XMAX_OC))
+  allocate(iboolleft_eta_outer_core(NGLOB2DMAX_YMIN_YMAX_OC), &
+          iboolright_eta_outer_core(NGLOB2DMAX_YMIN_YMAX_OC))
+  allocate(iboolfaces_outer_core(NGLOB2DMAX_XY,NUMFACES_SHARED))
+
+  ! inner core
+  allocate(ibelm_xmin_inner_core(NSPEC2DMAX_XMIN_XMAX_IC), &
+          ibelm_xmax_inner_core(NSPEC2DMAX_XMIN_XMAX_IC))
+  allocate(ibelm_ymin_inner_core(NSPEC2DMAX_YMIN_YMAX_IC), &
+          ibelm_ymax_inner_core(NSPEC2DMAX_YMIN_YMAX_IC))
+  allocate(ibelm_bottom_inner_core(NSPEC2D_BOTTOM_IC))
+  allocate(ibelm_top_inner_core(NSPEC2D_TOP_IC))
+
+
+  allocate(iboolcorner_inner_core(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED))
+  allocate(iboolleft_xi_inner_core(NGLOB2DMAX_XMIN_XMAX_IC), &
+          iboolright_xi_inner_core(NGLOB2DMAX_XMIN_XMAX_IC))
+  allocate(iboolleft_eta_inner_core(NGLOB2DMAX_YMIN_YMAX_IC), &
+          iboolright_eta_inner_core(NGLOB2DMAX_YMIN_YMAX_IC))
+  allocate(iboolfaces_inner_core(NGLOB2DMAX_XY,NUMFACES_SHARED))
+
+
+  ! crust mantle
+  allocate(xstore_crust_mantle(NGLOB_CRUST_MANTLE), &
+          ystore_crust_mantle(NGLOB_CRUST_MANTLE), &
+          zstore_crust_mantle(NGLOB_CRUST_MANTLE))
+  allocate(idoubling_crust_mantle(NSPEC_CRUST_MANTLE))
+  allocate(ibool_crust_mantle(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE), &
+           stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating temporary crust mantle arrays')
+
+  ! outer core
+  allocate(xstore_outer_core(NGLOB_OUTER_CORE), &
+          ystore_outer_core(NGLOB_OUTER_CORE), &
+          zstore_outer_core(NGLOB_OUTER_CORE))
+  allocate(idoubling_outer_core(NSPEC_OUTER_CORE))
+  allocate(ibool_outer_core(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE), &
+           stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating temporary outer core arrays')
+
+  ! inner core
+  allocate(xstore_inner_core(NGLOB_INNER_CORE), &
+          ystore_inner_core(NGLOB_INNER_CORE), &
+          zstore_inner_core(NGLOB_INNER_CORE))
+  allocate(idoubling_inner_core(NSPEC_INNER_CORE))
+  allocate(ibool_inner_core(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE), &
+           stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating temporary inner core arrays')
+
+  ! allocates temporary arrays
+  allocate(mask_ibool(NGLOB_CRUST_MANTLE))
+  allocate( is_on_a_slice_edge_crust_mantle(NSPEC_CRUST_MANTLE), &
+           is_on_a_slice_edge_inner_core(NSPEC_INNER_CORE), &
+           is_on_a_slice_edge_outer_core(NSPEC_OUTER_CORE), &
+           stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating temporary is_on_a_slice_edge arrays')
+          
+  
+  ! read coordinates of the mesh
+  ! crust mantle
+  ibool_crust_mantle(:,:,:,:) = -1
+  call cmi_read_solver_data(myrank,IREGION_CRUST_MANTLE, &
+                           NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
+                           xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle,&
+                           ibool_crust_mantle,idoubling_crust_mantle, &
+                           is_on_a_slice_edge_crust_mantle, &
+                           LOCAL_PATH)
+
+  ! check that the number of points in this slice is correct
+  if(minval(ibool_crust_mantle(:,:,:,:)) /= 1 .or. &
+    maxval(ibool_crust_mantle(:,:,:,:)) /= NGLOB_CRUST_MANTLE) &
+      call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in crust and mantle')
+
+  ! outer core
+  ibool_outer_core(:,:,:,:) = -1  
+  call cmi_read_solver_data(myrank,IREGION_OUTER_CORE, &
+                           NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
+                           xstore_outer_core,ystore_outer_core,zstore_outer_core,&
+                           ibool_outer_core,idoubling_outer_core, &
+                           is_on_a_slice_edge_outer_core, &
+                           LOCAL_PATH)
+
+  ! check that the number of points in this slice is correct
+  if(minval(ibool_outer_core(:,:,:,:)) /= 1 .or. &
+     maxval(ibool_outer_core(:,:,:,:)) /= NGLOB_OUTER_CORE) &
+    call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in outer core')
+
+  ! inner core
+  ibool_inner_core(:,:,:,:) = -1  
+  call cmi_read_solver_data(myrank,IREGION_INNER_CORE, &
+                           NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+                           xstore_inner_core,ystore_inner_core,zstore_inner_core,&
+                           ibool_inner_core,idoubling_inner_core, &
+                           is_on_a_slice_edge_inner_core, &
+                           LOCAL_PATH)
+
+  ! check that the number of points in this slice is correct
+  if(minval(ibool_inner_core(:,:,:,:)) /= 1 .or. maxval(ibool_inner_core(:,:,:,:)) /= NGLOB_INNER_CORE) &
+    call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in inner core')
+
+  ! synchronize processes
+  call sync_all()
+  
+  end subroutine cmi_read_addressing  
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine cmi_read_buffers()
+
+  use meshfem3D_par
+  use create_MPI_interfaces_par
+  implicit none
+
+  ! local parameters
+  integer :: ier
+  integer njunk1,njunk2
+  character(len=150) prname
+  ! debug
+  logical,parameter :: DEBUG_FLAGS = .false.
+  character(len=150) :: filename
+
+  ! read 2-D addressing for summation between slices with MPI
+
+  ! mantle and crust
+  if(myrank == 0) then
+    write(IMAIN,*) 
+    write(IMAIN,*) 'crust/mantle region:'
+  endif
+
+  call read_arrays_buffers_mesher(IREGION_CRUST_MANTLE,myrank,iboolleft_xi_crust_mantle, &
+             iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+             npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+             iprocfrom_faces,iprocto_faces,imsg_type, &
+             iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+             iboolfaces_crust_mantle,npoin2D_faces_crust_mantle, &
+             iboolcorner_crust_mantle, &
+             NGLOB2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE), &
+             NGLOB2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_XY,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
+             NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT,NPROC_XI,NPROC_ETA,LOCAL_PATH,NCHUNKS)
+
+  ! outer core
+  if(myrank == 0) write(IMAIN,*) 'outer core region:'
+
+  call read_arrays_buffers_mesher(IREGION_OUTER_CORE,myrank, &
+             iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
+             npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
+             iprocfrom_faces,iprocto_faces,imsg_type, &
+             iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+             iboolfaces_outer_core,npoin2D_faces_outer_core, &
+             iboolcorner_outer_core, &
+             NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE), &
+             NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE),NGLOB2DMAX_XY,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
+             NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT,NPROC_XI,NPROC_ETA,LOCAL_PATH,NCHUNKS)
+
+  ! inner core
+  if(myrank == 0) write(IMAIN,*) 'inner core region:'
+
+  call read_arrays_buffers_mesher(IREGION_INNER_CORE,myrank, &
+             iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+             npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+             iprocfrom_faces,iprocto_faces,imsg_type, &
+             iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+             iboolfaces_inner_core,npoin2D_faces_inner_core, &
+             iboolcorner_inner_core, &
+             NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE), &
+             NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE),NGLOB2DMAX_XY,NGLOB1D_RADIAL(IREGION_INNER_CORE), &
+             NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT,NPROC_XI,NPROC_ETA,LOCAL_PATH,NCHUNKS)
+
+  ! synchronizes processes
+  call sync_all()
+
+  ! read coupling arrays for inner core
+  ! create name of database
+  call create_name_database(prname,myrank,IREGION_INNER_CORE,LOCAL_PATH)
+
+  ! read info for vertical edges for central cube matching in inner core
+  open(unit=IIN,file=prname(1:len_trim(prname))//'boundary.bin', &
+        status='old',form='unformatted',action='read',iostat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error opening boundary.bin file')
+  
+  read(IIN) nspec2D_xmin_inner_core
+  read(IIN) nspec2D_xmax_inner_core
+  read(IIN) nspec2D_ymin_inner_core
+  read(IIN) nspec2D_ymax_inner_core
+  read(IIN) njunk1
+  read(IIN) njunk2
+
+  ! boundary parameters
+  read(IIN) ibelm_xmin_inner_core
+  read(IIN) ibelm_xmax_inner_core
+  read(IIN) ibelm_ymin_inner_core
+  read(IIN) ibelm_ymax_inner_core
+  read(IIN) ibelm_bottom_inner_core
+  read(IIN) ibelm_top_inner_core
+  close(IIN)
+
+
+  ! added this to reduce the size of the buffers
+  ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+  npoin2D_max_all_CM_IC = max(maxval(npoin2D_xi_crust_mantle(:) + npoin2D_xi_inner_core(:)), &
+                        maxval(npoin2D_eta_crust_mantle(:) + npoin2D_eta_inner_core(:)))
+
+  allocate(buffer_send_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED), &
+          buffer_received_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED),stat=ier)
+  if( ier /= 0 ) call exit_MPI(myrank,'error allocating mpi buffer')
+
+  ! central cube buffers
+  if(INCLUDE_CENTRAL_CUBE) then
+
+    if(myrank == 0) then
+      write(IMAIN,*)
+      write(IMAIN,*) 'including central cube'
+    endif
+    call sync_all()
+    
+    ! compute number of messages to expect in cube as well as their size
+    call comp_central_cube_buffer_size(iproc_xi,iproc_eta,ichunk, &
+                NPROC_XI,NPROC_ETA,NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
+                nb_msgs_theor_in_cube,npoin2D_cube_from_slices)
+
+    ! this value is used for dynamic memory allocation, therefore make sure it is never zero
+    if(nb_msgs_theor_in_cube > 0) then
+      non_zero_nb_msgs_theor_in_cube = nb_msgs_theor_in_cube
+    else
+      non_zero_nb_msgs_theor_in_cube = 1
+    endif
+
+    ! allocate buffers for cube and slices
+    allocate(sender_from_slices_to_cube(non_zero_nb_msgs_theor_in_cube), &
+            buffer_all_cube_from_slices(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM), &
+            buffer_slices(npoin2D_cube_from_slices,NDIM), &
+            buffer_slices2(npoin2D_cube_from_slices,NDIM), &
+            ibool_central_cube(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices),stat=ier)
+    if( ier /= 0 ) call exit_MPI(myrank,'error allocating cube buffers')
+
+    ! handles the communications with the central cube if it was included in the mesh
+    ! create buffers to assemble with the central cube
+    call create_central_cube_buffers(myrank,iproc_xi,iproc_eta,ichunk, &
+               NPROC_XI,NPROC_ETA,NCHUNKS, &
+               NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+               NSPEC2DMAX_XMIN_XMAX(IREGION_INNER_CORE),NSPEC2DMAX_YMIN_YMAX(IREGION_INNER_CORE), &
+               NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
+               addressing,ibool_inner_core,idoubling_inner_core, &
+               xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+               nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
+               nspec2D_ymin_inner_core,nspec2D_ymax_inner_core, &
+               ibelm_xmin_inner_core,ibelm_xmax_inner_core, &
+               ibelm_ymin_inner_core,ibelm_ymax_inner_core,ibelm_bottom_inner_core, &
+               nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices, &
+               receiver_cube_from_slices,sender_from_slices_to_cube,ibool_central_cube, &
+               buffer_slices,buffer_slices2,buffer_all_cube_from_slices)
+
+    if(myrank == 0) write(IMAIN,*) ''
+
+  else
+
+    ! allocate fictitious buffers for cube and slices with a dummy size
+    ! just to be able to use them as arguments in subroutine calls
+    allocate(sender_from_slices_to_cube(1), &
+            buffer_all_cube_from_slices(1,1,1), &
+            buffer_slices(1,1), &
+            buffer_slices2(1,1), &
+            ibool_central_cube(1,1),stat=ier)
+    if( ier /= 0 ) call exit_MPI(myrank,'error allocating dummy buffers')
+
+  endif
+
+  ! note: fix_... routines below update is_on_a_slice_edge_.. arrays:
+  !          assign flags for each element which is on a rim of the slice
+  !          thus, they include elements on top and bottom not shared with other MPI partitions
+  !
+  !          we will re-set these flags when setting up inner/outer elements, but will
+  !          use these arrays for now as initial guess for the search for elements which share a global point
+  !          between different MPI processes
+  call fix_non_blocking_slices(is_on_a_slice_edge_crust_mantle,iboolright_xi_crust_mantle, &
+         iboolleft_xi_crust_mantle,iboolright_eta_crust_mantle,iboolleft_eta_crust_mantle, &
+         npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle,ibool_crust_mantle, &
+         mask_ibool,NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE,NGLOB2DMAX_XMIN_XMAX_CM,NGLOB2DMAX_YMIN_YMAX_CM)
+
+  call fix_non_blocking_slices(is_on_a_slice_edge_outer_core,iboolright_xi_outer_core, &
+         iboolleft_xi_outer_core,iboolright_eta_outer_core,iboolleft_eta_outer_core, &
+         npoin2D_xi_outer_core,npoin2D_eta_outer_core,ibool_outer_core, &
+         mask_ibool,NSPEC_OUTER_CORE,NGLOB_OUTER_CORE,NGLOB2DMAX_XMIN_XMAX_OC,NGLOB2DMAX_YMIN_YMAX_OC)
+
+  call fix_non_blocking_slices(is_on_a_slice_edge_inner_core,iboolright_xi_inner_core, &
+         iboolleft_xi_inner_core,iboolright_eta_inner_core,iboolleft_eta_inner_core, &
+         npoin2D_xi_inner_core,npoin2D_eta_inner_core,ibool_inner_core, &
+         mask_ibool,NSPEC_INNER_CORE,NGLOB_INNER_CORE,NGLOB2DMAX_XMIN_XMAX_IC,NGLOB2DMAX_YMIN_YMAX_IC)
+
+  if(INCLUDE_CENTRAL_CUBE) then
+    ! updates flags for elements on slice boundaries
+    call fix_non_blocking_central_cube(is_on_a_slice_edge_inner_core, &
+         ibool_inner_core,NSPEC_INNER_CORE,NGLOB_INNER_CORE,nb_msgs_theor_in_cube,ibelm_bottom_inner_core, &
+         idoubling_inner_core,npoin2D_cube_from_slices, &
+         ibool_central_cube,NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
+         ichunk,NPROC_XI)
+  endif
+
+  ! debug: saves element flags
+  if( DEBUG_FLAGS ) then
+    ! crust mantle
+    write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_crust_mantle_proc',myrank
+    call write_VTK_data_elem_l(NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
+                              xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+                              ibool_crust_mantle, &
+                              is_on_a_slice_edge_crust_mantle,filename)
+    ! outer core
+    write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_outer_core_proc',myrank
+    call write_VTK_data_elem_l(NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
+                              xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+                              ibool_outer_core, &
+                              is_on_a_slice_edge_outer_core,filename)
+    ! inner core
+    write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_inner_core_proc',myrank
+    call write_VTK_data_elem_l(NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+                              xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+                              ibool_inner_core, &
+                              is_on_a_slice_edge_inner_core,filename)
+  endif
+
+  end subroutine cmi_read_buffers
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine cmi_setup_MPIinterfaces()
+
+  use meshfem3D_par
+  use create_MPI_interfaces_par
+  implicit none
+
+  include 'mpif.h'
+
+  ! local parameters
+  integer :: ier,ndim_assemble
+
+  ! temporary buffers for send and receive between faces of the slices and the chunks
+  real(kind=CUSTOM_REAL), dimension(npoin2D_max_all_CM_IC) ::  &
+    buffer_send_faces_scalar,buffer_received_faces_scalar
+
+  ! assigns initial maximum arrays
+  ! for global slices, maximum number of neighbor is around 17 ( 8 horizontal, max of 8 on bottom )
+  integer :: MAX_NEIGHBOURS
+  integer, dimension(:),allocatable :: my_neighbours,nibool_neighbours
+  integer, dimension(:,:),allocatable :: ibool_neighbours
+  integer :: max_nibool
+  real(kind=CUSTOM_REAL),dimension(:),allocatable :: test_flag
+  integer,dimension(:),allocatable :: dummy_i
+  integer :: i,j,k,ispec,iglob
+  ! debug
+  character(len=150) :: filename
+  logical,parameter :: DEBUG_INTERFACES = .false.
+
+  ! estimates a maximum size of needed arrays
+  MAX_NEIGHBOURS = 8 + NCORNERSCHUNKS  
+  allocate(my_neighbours(MAX_NEIGHBOURS), &
+          nibool_neighbours(MAX_NEIGHBOURS),stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating my_neighbours array')
+  
+  ! estimates initial maximum ibool array
+  max_nibool = npoin2D_max_all_CM_IC * NUMFACES_SHARED &
+               + non_zero_nb_msgs_theor_in_cube*npoin2D_cube_from_slices
+
+  allocate(ibool_neighbours(max_nibool,MAX_NEIGHBOURS), stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating ibool_neighbours')
+
+
+! sets up MPI interfaces
+! crust mantle region
+  if( myrank == 0 ) write(IMAIN,*) 'crust mantle mpi:'
+  allocate(test_flag(NGLOB_CRUST_MANTLE), &
+          stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag')
+
+  ! sets flag to rank id (+1 to avoid problems with zero rank)
+  test_flag(:) = myrank + 1.0
+
+  ! assembles values
+  call assemble_MPI_scalar_block(myrank,test_flag, &
+            NGLOB_CRUST_MANTLE, &
+            iproc_xi,iproc_eta,ichunk,addressing, &
+            iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+            npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+            iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+            iprocfrom_faces,iprocto_faces,imsg_type, &
+            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
+            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
+            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
+            NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
+            NGLOB2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_XY,NCHUNKS)
+
+  ! removes own myrank id (+1)
+  test_flag(:) = test_flag(:) - ( myrank + 1.0)
+
+  ! debug: saves array
+  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_crust_mantle_proc',myrank
+  !call write_VTK_glob_points(NGLOB_CRUST_MANTLE, &
+  !                      xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+  !                      test_flag,filename)
+
+  allocate(dummy_i(NSPEC_CRUST_MANTLE),stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating dummy_i')
+
+  ! determines neighbor rank for shared faces
+  call get_MPI_interfaces(myrank,NGLOB_CRUST_MANTLE,NSPEC_CRUST_MANTLE, &
+                            test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
+                            num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+                            max_nibool,MAX_NEIGHBOURS, &
+                            ibool_crust_mantle,&
+                            is_on_a_slice_edge_crust_mantle, &
+                            IREGION_CRUST_MANTLE,.false.,dummy_i,INCLUDE_CENTRAL_CUBE, &
+                            xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle,NPROCTOT)
+
+  deallocate(test_flag)
+  deallocate(dummy_i)
+
+  ! stores MPI interfaces informations
+  allocate(my_neighbours_crust_mantle(num_interfaces_crust_mantle), &
+          nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
+          stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array my_neighbours_crust_mantle etc.')
+  my_neighbours_crust_mantle = -1
+  nibool_interfaces_crust_mantle = 0
+
+  ! copies interfaces arrays
+  if( num_interfaces_crust_mantle > 0 ) then
+    allocate(ibool_interfaces_crust_mantle(max_nibool_interfaces_crust_mantle,num_interfaces_crust_mantle), &
+           stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_crust_mantle')
+    ibool_interfaces_crust_mantle = 0
+
+    ! ranks of neighbour processes
+    my_neighbours_crust_mantle(:) = my_neighbours(1:num_interfaces_crust_mantle)
+    ! number of global ibool entries on each interface
+    nibool_interfaces_crust_mantle(:) = nibool_neighbours(1:num_interfaces_crust_mantle)
+    ! global iglob point ids on each interface
+    ibool_interfaces_crust_mantle(:,:) = ibool_neighbours(1:max_nibool_interfaces_crust_mantle,1:num_interfaces_crust_mantle)
+  else
+    ! dummy allocation (fortran90 should allow allocate statement with zero array size)
+    max_nibool_interfaces_crust_mantle = 0
+    allocate(ibool_interfaces_crust_mantle(0,0),stat=ier)
+  endif
+
+  ! debug: outputs MPI interface
+  if( DEBUG_INTERFACES ) then
+  do i=1,num_interfaces_crust_mantle
+    write(filename,'(a,i6.6,a,i2.2)') trim(OUTPUT_FILES)//'/MPI_points_crust_mantle_proc',myrank, &
+                    '_',my_neighbours_crust_mantle(i)
+    call write_VTK_data_points(NGLOB_crust_mantle, &
+                      xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+                      ibool_interfaces_crust_mantle(1:nibool_interfaces_crust_mantle(i),i), &
+                      nibool_interfaces_crust_mantle(i),filename)
+  enddo
+  call sync_all()
+  endif
+
+  ! checks addressing
+  call test_MPI_neighbours(IREGION_CRUST_MANTLE, &
+                              num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+                              my_neighbours_crust_mantle,nibool_interfaces_crust_mantle, &
+                              ibool_interfaces_crust_mantle)
+
+  ! allocates MPI buffers
+  ! crust mantle
+  allocate(buffer_send_vector_crust_mantle(NDIM,max_nibool_interfaces_crust_mantle,num_interfaces_crust_mantle), &
+          buffer_recv_vector_crust_mantle(NDIM,max_nibool_interfaces_crust_mantle,num_interfaces_crust_mantle), &
+          request_send_vector_crust_mantle(num_interfaces_crust_mantle), &
+          request_recv_vector_crust_mantle(num_interfaces_crust_mantle), &
+          stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array buffer_send_vector_crust_mantle etc.')
+
+  ! checks with assembly of test fields
+  call test_MPI_cm()
+
+
+! outer core region
+  if( myrank == 0 ) write(IMAIN,*) 'outer core mpi:'
+
+  allocate(test_flag(NGLOB_OUTER_CORE), &
+          stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag outer core')
+
+  ! sets flag to rank id (+1 to avoid problems with zero rank)
+  test_flag(:) = myrank + 1.0
+
+  ! assembles values
+  call assemble_MPI_scalar_block(myrank,test_flag, &
+            NGLOB_OUTER_CORE, &
+            iproc_xi,iproc_eta,ichunk,addressing, &
+            iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
+            npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
+            iboolfaces_outer_core,iboolcorner_outer_core, &
+            iprocfrom_faces,iprocto_faces,imsg_type, &
+            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
+            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
+            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
+            NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
+            NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE),NGLOB2DMAX_XY,NCHUNKS)
+
+
+  ! removes own myrank id (+1)
+  test_flag(:) = test_flag(:) - ( myrank + 1.0)
+
+  ! debug: saves array
+  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_outer_core_proc',myrank
+  !call write_VTK_glob_points(NGLOB_OUTER_CORE, &
+  !                      xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+  !                      test_flag,filename)
+
+  allocate(dummy_i(NSPEC_OUTER_CORE),stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating dummy_i')
+
+  ! determines neighbor rank for shared faces
+  call get_MPI_interfaces(myrank,NGLOB_OUTER_CORE,NSPEC_OUTER_CORE, &
+                            test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
+                            num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+                            max_nibool,MAX_NEIGHBOURS, &
+                            ibool_outer_core,&
+                            is_on_a_slice_edge_outer_core, &
+                            IREGION_OUTER_CORE,.false.,dummy_i,INCLUDE_CENTRAL_CUBE, &
+                            xstore_outer_core,ystore_outer_core,zstore_outer_core,NPROCTOT)
+
+  deallocate(test_flag)
+  deallocate(dummy_i)
+
+  ! stores MPI interfaces informations
+  allocate(my_neighbours_outer_core(num_interfaces_outer_core), &
+          nibool_interfaces_outer_core(num_interfaces_outer_core), &
+          stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array my_neighbours_outer_core etc.')
+  my_neighbours_outer_core = -1
+  nibool_interfaces_outer_core = 0
+
+  ! copies interfaces arrays
+  if( num_interfaces_outer_core > 0 ) then
+    allocate(ibool_interfaces_outer_core(max_nibool_interfaces_outer_core,num_interfaces_outer_core), &
+           stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_outer_core')
+    ibool_interfaces_outer_core = 0
+
+    ! ranks of neighbour processes
+    my_neighbours_outer_core(:) = my_neighbours(1:num_interfaces_outer_core)
+    ! number of global ibool entries on each interface
+    nibool_interfaces_outer_core(:) = nibool_neighbours(1:num_interfaces_outer_core)
+    ! global iglob point ids on each interface
+    ibool_interfaces_outer_core(:,:) = ibool_neighbours(1:max_nibool_interfaces_outer_core,1:num_interfaces_outer_core)
+  else
+    ! dummy allocation (fortran90 should allow allocate statement with zero array size)
+    max_nibool_interfaces_outer_core = 0
+    allocate(ibool_interfaces_outer_core(0,0),stat=ier)
+  endif
+
+  ! debug: outputs MPI interface
+  if( DEBUG_INTERFACES ) then
+  do i=1,num_interfaces_outer_core
+    write(filename,'(a,i6.6,a,i2.2)') trim(OUTPUT_FILES)//'/MPI_points_outer_core_proc',myrank, &
+                    '_',my_neighbours_outer_core(i)
+    call write_VTK_data_points(NGLOB_OUTER_CORE, &
+                      xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+                      ibool_interfaces_outer_core(1:nibool_interfaces_outer_core(i),i), &
+                      nibool_interfaces_outer_core(i),filename)
+  enddo
+  call sync_all()
+  endif
+  
+  ! checks addressing
+  call test_MPI_neighbours(IREGION_OUTER_CORE, &
+                              num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+                              my_neighbours_outer_core,nibool_interfaces_outer_core, &
+                              ibool_interfaces_outer_core)
+
+  ! allocates MPI buffers
+  ! outer core
+  allocate(buffer_send_scalar_outer_core(max_nibool_interfaces_outer_core,num_interfaces_outer_core), &
+          buffer_recv_scalar_outer_core(max_nibool_interfaces_outer_core,num_interfaces_outer_core), &
+          request_send_scalar_outer_core(num_interfaces_outer_core), &
+          request_recv_scalar_outer_core(num_interfaces_outer_core), &
+          stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array buffer_send_vector_outer_core etc.')
+
+  ! checks with assembly of test fields
+  call test_MPI_oc()
+
+
+! inner core
+  if( myrank == 0 ) write(IMAIN,*) 'inner core mpi:'
+
+  allocate(test_flag(NGLOB_INNER_CORE), &
+          stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag inner core')
+
+  ! sets flag to rank id (+1 to avoid problems with zero rank)
+  test_flag(:) = 0.0
+  do ispec=1,NSPEC_INNER_CORE
+    ! suppress fictitious elements in central cube
+    if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE) cycle
+    ! sets flags
+    do k = 1,NGLLZ
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          iglob = ibool_inner_core(i,j,k,ispec)
+          test_flag(iglob) = myrank + 1.0
+        enddo
+      enddo
+    enddo
+  enddo
+
+  ! assembles values
+  call assemble_MPI_scalar_block(myrank,test_flag, &
+            NGLOB_INNER_CORE, &
+            iproc_xi,iproc_eta,ichunk,addressing, &
+            iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+            npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+            iboolfaces_inner_core,iboolcorner_inner_core, &
+            iprocfrom_faces,iprocto_faces,imsg_type, &
+            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
+            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
+            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
+            NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL(IREGION_INNER_CORE), &
+            NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE),NGLOB2DMAX_XY,NCHUNKS)
+
+  ! debug: saves array
+  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_inner_core_A_proc',myrank
+  !call write_VTK_glob_points(NGLOB_INNER_CORE, &
+  !                      xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+  !                      test_flag,filename)
+  
+  ! debug: idoubling inner core
+  if( DEBUG_INTERFACES ) then
+    write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_idoubling_inner_core_proc',myrank
+    call write_VTK_data_elem_i(NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+                            xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+                            ibool_inner_core, &
+                            idoubling_inner_core,filename)
+    call sync_all()
+  endif
+  
+  ! including central cube
+  if(INCLUDE_CENTRAL_CUBE) then
+    ! user output
+    if( myrank == 0 ) write(IMAIN,*) 'inner core with central cube mpi:'
+
+    ! test_flag is a scalar, not a vector
+    ndim_assemble = 1
+
+    ! use central cube buffers to assemble the inner core mass matrix with the central cube
+    call assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
+                 npoin2D_cube_from_slices, buffer_all_cube_from_slices, &
+                 buffer_slices, buffer_slices2, ibool_central_cube, &
+                 receiver_cube_from_slices, ibool_inner_core, &
+                 idoubling_inner_core, NSPEC_INNER_CORE, &
+                 ibelm_bottom_inner_core, NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
+                 NGLOB_INNER_CORE, &
+                 test_flag,ndim_assemble, &
+                 iproc_eta,addressing,NCHUNKS,NPROC_XI,NPROC_ETA)
+  endif
+
+  ! removes own myrank id (+1)
+  test_flag = test_flag - ( myrank + 1.0)
+  where( test_flag < 0.0 ) test_flag = 0.0
+
+  ! debug: saves array
+  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_inner_core_B_proc',myrank
+  !call write_VTK_glob_points(NGLOB_INNER_CORE, &
+  !                    xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+  !                    test_flag,filename)
+  !call sync_all()
+
+  ! in sequential order, for testing purpose
+  do i=0,NPROCTOT - 1
+    if( myrank == i ) then
+      ! gets new interfaces for inner_core without central cube yet
+      ! determines neighbor rank for shared faces
+      call get_MPI_interfaces(myrank,NGLOB_INNER_CORE,NSPEC_INNER_CORE, &
+                            test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
+                            num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+                            max_nibool,MAX_NEIGHBOURS, &
+                            ibool_inner_core,&
+                            is_on_a_slice_edge_inner_core, &
+                            IREGION_INNER_CORE,.false.,idoubling_inner_core,INCLUDE_CENTRAL_CUBE, &
+                            xstore_inner_core,ystore_inner_core,zstore_inner_core,NPROCTOT)
+
+    endif
+    call sync_all()
+  enddo
+
+
+  deallocate(test_flag)
+  call sync_all()
+
+  ! stores MPI interfaces informations
+  allocate(my_neighbours_inner_core(num_interfaces_inner_core), &
+          nibool_interfaces_inner_core(num_interfaces_inner_core), &
+          stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array my_neighbours_inner_core etc.')
+  my_neighbours_inner_core = -1
+  nibool_interfaces_inner_core = 0
+
+  ! copies interfaces arrays
+  if( num_interfaces_inner_core > 0 ) then
+    allocate(ibool_interfaces_inner_core(max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
+           stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_inner_core')
+    ibool_interfaces_inner_core = 0
+
+    ! ranks of neighbour processes
+    my_neighbours_inner_core(:) = my_neighbours(1:num_interfaces_inner_core)
+    ! number of global ibool entries on each interface
+    nibool_interfaces_inner_core(:) = nibool_neighbours(1:num_interfaces_inner_core)
+    ! global iglob point ids on each interface
+    ibool_interfaces_inner_core(:,:) = ibool_neighbours(1:max_nibool_interfaces_inner_core,1:num_interfaces_inner_core)
+  else
+    ! dummy allocation (fortran90 should allow allocate statement with zero array size)
+    max_nibool_interfaces_inner_core = 0
+    allocate(ibool_interfaces_inner_core(0,0),stat=ier)
+  endif
+
+  ! debug: saves MPI interfaces
+  if( DEBUG_INTERFACES ) then
+  do i=1,num_interfaces_inner_core
+    write(filename,'(a,i6.6,a,i2.2)') trim(OUTPUT_FILES)//'/MPI_points_inner_core_proc',myrank, &
+                    '_',my_neighbours_inner_core(i)
+    call write_VTK_data_points(NGLOB_INNER_CORE, &
+                      xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+                      ibool_interfaces_inner_core(1:nibool_interfaces_inner_core(i),i), &
+                      nibool_interfaces_inner_core(i),filename)
+  enddo
+  call sync_all()
+  endif
+  
+  ! checks addressing
+  call test_MPI_neighbours(IREGION_INNER_CORE, &
+                              num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+                              my_neighbours_inner_core,nibool_interfaces_inner_core, &
+                              ibool_interfaces_inner_core)
+
+  ! allocates MPI buffers
+  ! inner core
+  allocate(buffer_send_vector_inner_core(NDIM,max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
+          buffer_recv_vector_inner_core(NDIM,max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
+          request_send_vector_inner_core(num_interfaces_inner_core), &
+          request_recv_vector_inner_core(num_interfaces_inner_core), &
+          stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array buffer_send_vector_inner_core etc.')
+
+  ! checks with assembly of test fields
+  call test_MPI_ic()
+
+  ! synchronizes MPI processes
+  call sync_all()
+
+  ! frees temporary array
+  deallocate(ibool_neighbours)
+  deallocate(my_neighbours,nibool_neighbours)
+
+  end subroutine cmi_setup_MPIinterfaces
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine cmi_setup_InnerOuter()
+  
+  use meshfem3D_par
+  use create_MPI_interfaces_par
+  implicit none
+
+  ! local parameters
+  real :: percentage_edge
+  integer :: ier,ispec,iinner,iouter
+  ! debug  
+  character(len=150) :: filename
+  logical,parameter :: DEBUG_INTERFACES = .false.
+  
+  ! stores inner / outer elements
+  !
+  ! note: arrays is_on_a_slice_edge_.. have flags set for elements which need to
+  !         communicate with other MPI processes
+
+  ! crust_mantle
+  nspec_outer_crust_mantle = count( is_on_a_slice_edge_crust_mantle )
+  nspec_inner_crust_mantle = NSPEC_CRUST_MANTLE - nspec_outer_crust_mantle
+
+  num_phase_ispec_crust_mantle = max(nspec_inner_crust_mantle,nspec_outer_crust_mantle)
+
+  allocate(phase_ispec_inner_crust_mantle(num_phase_ispec_crust_mantle,2),stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array phase_ispec_inner_crust_mantle')
+
+  phase_ispec_inner_crust_mantle(:,:) = 0
+  iinner = 0
+  iouter = 0
+  do ispec=1,NSPEC_CRUST_MANTLE
+    if( is_on_a_slice_edge_crust_mantle(ispec) ) then
+      ! outer element
+      iouter = iouter + 1
+      phase_ispec_inner_crust_mantle(iouter,1) = ispec
+    else
+      ! inner element
+      iinner = iinner + 1
+      phase_ispec_inner_crust_mantle(iinner,2) = ispec
+    endif
+  enddo
+
+  ! outer_core
+  nspec_outer_outer_core = count( is_on_a_slice_edge_outer_core )
+  nspec_inner_outer_core = NSPEC_OUTER_CORE - nspec_outer_outer_core
+
+  num_phase_ispec_outer_core = max(nspec_inner_outer_core,nspec_outer_outer_core)
+
+  allocate(phase_ispec_inner_outer_core(num_phase_ispec_outer_core,2),stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array phase_ispec_inner_outer_core')
+
+  phase_ispec_inner_outer_core(:,:) = 0
+  iinner = 0
+  iouter = 0
+  do ispec=1,NSPEC_OUTER_CORE
+    if( is_on_a_slice_edge_outer_core(ispec) ) then
+      ! outer element
+      iouter = iouter + 1
+      phase_ispec_inner_outer_core(iouter,1) = ispec
+    else
+      ! inner element
+      iinner = iinner + 1
+      phase_ispec_inner_outer_core(iinner,2) = ispec
+    endif
+  enddo
+
+  ! inner_core
+  nspec_outer_inner_core = count( is_on_a_slice_edge_inner_core )
+  nspec_inner_inner_core = NSPEC_INNER_CORE - nspec_outer_inner_core
+
+  num_phase_ispec_inner_core = max(nspec_inner_inner_core,nspec_outer_inner_core)
+
+  allocate(phase_ispec_inner_inner_core(num_phase_ispec_inner_core,2),stat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array phase_ispec_inner_inner_core')
+
+  phase_ispec_inner_inner_core(:,:) = 0
+  iinner = 0
+  iouter = 0
+  do ispec=1,NSPEC_INNER_CORE
+    if( is_on_a_slice_edge_inner_core(ispec) ) then
+      ! outer element
+      iouter = iouter + 1
+      phase_ispec_inner_inner_core(iouter,1) = ispec
+    else
+      ! inner element
+      iinner = iinner + 1
+      phase_ispec_inner_inner_core(iinner,2) = ispec
+    endif
+  enddo
+
+  ! user output
+  if(myrank == 0) then
+
+    write(IMAIN,*)
+    write(IMAIN,*) 'for overlapping of communications with calculations:'
+    write(IMAIN,*)
+
+    percentage_edge = 100. * nspec_outer_crust_mantle / real(NSPEC_CRUST_MANTLE)
+    write(IMAIN,*) 'percentage of edge elements in crust/mantle ',percentage_edge,'%'
+    write(IMAIN,*) 'percentage of volume elements in crust/mantle ',100. - percentage_edge,'%'
+    write(IMAIN,*)
+
+    percentage_edge = 100.* nspec_outer_outer_core / real(NSPEC_OUTER_CORE)
+    write(IMAIN,*) 'percentage of edge elements in outer core ',percentage_edge,'%'
+    write(IMAIN,*) 'percentage of volume elements in outer core ',100. - percentage_edge,'%'
+    write(IMAIN,*)
+
+    percentage_edge = 100. * nspec_outer_inner_core / real(NSPEC_INNER_CORE)
+    write(IMAIN,*) 'percentage of edge elements in inner core ',percentage_edge,'%'
+    write(IMAIN,*) 'percentage of volume elements in inner core ',100. - percentage_edge,'%'
+    write(IMAIN,*)
+
+  endif
+
+  ! debug: saves element flags
+  if( DEBUG_INTERFACES ) then
+    ! crust mantle
+    write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_crust_mantle_proc',myrank
+    call write_VTK_data_elem_l(NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
+                              xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
+                              ibool_crust_mantle, &
+                              is_on_a_slice_edge_crust_mantle,filename)
+    ! outer core
+    write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_outer_core_proc',myrank
+    call write_VTK_data_elem_l(NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
+                              xstore_outer_core,ystore_outer_core,zstore_outer_core, &
+                              ibool_outer_core, &
+                              is_on_a_slice_edge_outer_core,filename)
+    ! inner core
+    write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_inner_core_proc',myrank
+    call write_VTK_data_elem_l(NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+                              xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+                              ibool_inner_core, &
+                              is_on_a_slice_edge_inner_core,filename)
+  endif
+  
+  end subroutine cmi_setup_InnerOuter
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine cmi_setup_color_perm()
+  
+  use meshfem3D_par
+  use create_MPI_interfaces_par
+  implicit none
+
+  ! local parameters
+  integer, dimension(:), allocatable :: perm
+  integer :: ier
+
+  ! user output
+  if(myrank == 0) then
+    write(IMAIN,*) 'mesh coloring: ',USE_MESH_COLORING_GPU
+  endif
+
+  ! crust mantle
+  ! initializes
+  num_colors_outer_crust_mantle = 0
+  num_colors_inner_crust_mantle = 0
+
+  ! mesh coloring
+  if( USE_MESH_COLORING_GPU ) then
+
+    ! user output
+    if(myrank == 0) write(IMAIN,*) '  coloring crust mantle... '
+
+    ! creates coloring of elements
+    allocate(perm(NSPEC_CRUST_MANTLE),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating temporary perm crust mantle array')
+    perm(:) = 0
+
+    allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle+num_colors_inner_crust_mantle),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating num_elem_colors_crust_mantle array')
+
+    deallocate(perm)
+  else
+    ! dummy array
+    allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle+num_colors_inner_crust_mantle),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating num_elem_colors_crust_mantle array')
+  endif
+
+  ! outer core
+  ! initializes
+  num_colors_outer_outer_core = 0
+  num_colors_inner_outer_core = 0
+
+  ! mesh coloring
+  if( USE_MESH_COLORING_GPU ) then
+
+    ! user output
+    if(myrank == 0) write(IMAIN,*) '  coloring outer core... '
+
+    ! creates coloring of elements
+    allocate(perm(NSPEC_OUTER_CORE),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating temporary perm outer core array')
+    perm(:) = 0
+
+    allocate(num_elem_colors_outer_core(num_colors_outer_outer_core+num_colors_inner_outer_core),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating num_elem_colors_outer_core array')
+
+    deallocate(perm)
+  else
+    ! dummy array 
+    allocate(num_elem_colors_outer_core(num_colors_outer_outer_core+num_colors_inner_outer_core),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating num_elem_colors_outer_core array')
+  endif
+  
+  ! inner core
+  ! initializes
+  num_colors_outer_inner_core = 0
+  num_colors_inner_inner_core = 0
+
+  ! mesh coloring
+  if( USE_MESH_COLORING_GPU ) then
+
+    ! user output
+    if(myrank == 0) write(IMAIN,*) '  coloring inner core... '
+
+    ! creates coloring of elements
+    allocate(perm(NSPEC_INNER_CORE),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating temporary perm inner core array')
+    perm(:) = 0
+
+    allocate(num_elem_colors_inner_core(num_colors_outer_inner_core+num_colors_inner_inner_core),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating num_elem_colors_inner_core array')
+
+    deallocate(perm)
+  else
+    ! dummy array
+    allocate(num_elem_colors_inner_core(num_colors_outer_inner_core+num_colors_inner_inner_core),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating num_elem_colors_inner_core array')
+  endif
+  
+  end subroutine cmi_setup_color_perm
+  
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine cmi_save_interfaces()
+  
+  use meshfem3D_par
+  use create_MPI_interfaces_par
+  implicit none
+
+  ! crust mantle
+  call cmi_save_solver_data(myrank,IREGION_CRUST_MANTLE,LOCAL_PATH, &
+                           num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+                           my_neighbours_crust_mantle,nibool_interfaces_crust_mantle, &
+                           ibool_interfaces_crust_mantle, &
+                           nspec_inner_crust_mantle,nspec_outer_crust_mantle, &
+                           num_phase_ispec_crust_mantle,phase_ispec_inner_crust_mantle, &
+                           num_colors_outer_crust_mantle,num_colors_inner_crust_mantle, &
+                           num_elem_colors_crust_mantle)
+
+
+  ! outer core
+  call cmi_save_solver_data(myrank,IREGION_OUTER_CORE,LOCAL_PATH, &
+                           num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+                           my_neighbours_outer_core,nibool_interfaces_outer_core, &
+                           ibool_interfaces_outer_core, &
+                           nspec_inner_outer_core,nspec_outer_outer_core, &
+                           num_phase_ispec_outer_core,phase_ispec_inner_outer_core, &
+                           num_colors_outer_outer_core,num_colors_inner_outer_core, &
+                           num_elem_colors_outer_core)
+
+
+  ! inner core
+  call cmi_save_solver_data(myrank,IREGION_INNER_CORE,LOCAL_PATH, &
+                           num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+                           my_neighbours_inner_core,nibool_interfaces_inner_core, &
+                           ibool_interfaces_inner_core, &
+                           nspec_inner_inner_core,nspec_outer_inner_core, &
+                           num_phase_ispec_inner_core,phase_ispec_inner_inner_core, &
+                           num_colors_outer_inner_core,num_colors_inner_inner_core, &
+                           num_elem_colors_inner_core)
+
+
+  end subroutine cmi_save_interfaces
+
+  
+  
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine cmi_free_arrays()
+
+  use meshfem3D_par
+  use create_MPI_interfaces_par
+  implicit none
+
+  ! synchronize processes
+  call sync_all()
+
+  deallocate(iprocfrom_faces,iprocto_faces,imsg_type)
+  deallocate(iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners)
+  deallocate(buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar)
+  deallocate(buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector)
+
+  ! crust mantle
+  deallocate(iboolcorner_crust_mantle)
+  deallocate(iboolleft_xi_crust_mantle, &
+          iboolright_xi_crust_mantle)          
+  deallocate(iboolleft_eta_crust_mantle, &
+          iboolright_eta_crust_mantle)
+  deallocate(iboolfaces_crust_mantle)
+
+  deallocate(phase_ispec_inner_crust_mantle)
+  deallocate(num_elem_colors_crust_mantle)
+  
+  ! outer core
+  deallocate(iboolcorner_outer_core)
+  deallocate(iboolleft_xi_outer_core, &
+          iboolright_xi_outer_core)
+  deallocate(iboolleft_eta_outer_core, &
+          iboolright_eta_outer_core)
+  deallocate(iboolfaces_outer_core)
+
+  deallocate(phase_ispec_inner_outer_core)
+  deallocate(num_elem_colors_outer_core)
+
+  ! inner core
+  deallocate(ibelm_xmin_inner_core, &
+          ibelm_xmax_inner_core)
+  deallocate(ibelm_ymin_inner_core, &
+          ibelm_ymax_inner_core)
+  deallocate(ibelm_bottom_inner_core)
+  deallocate(ibelm_top_inner_core)
+
+  deallocate(iboolcorner_inner_core)
+  deallocate(iboolleft_xi_inner_core, &
+          iboolright_xi_inner_core)
+  deallocate(iboolleft_eta_inner_core, &
+          iboolright_eta_inner_core)
+  deallocate(iboolfaces_inner_core)
+
+  deallocate(xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle)
+  deallocate(idoubling_crust_mantle,ibool_crust_mantle)
+
+  deallocate(xstore_outer_core,ystore_outer_core,zstore_outer_core)
+  deallocate(idoubling_outer_core,ibool_outer_core)
+
+  deallocate(xstore_inner_core,ystore_inner_core,zstore_inner_core)
+  deallocate(idoubling_inner_core,ibool_inner_core)
+
+  deallocate(phase_ispec_inner_inner_core)
+  deallocate(num_elem_colors_inner_core)
+
+  deallocate(mask_ibool)
+  
+  ! frees temporary allocated arrays
+  deallocate(is_on_a_slice_edge_crust_mantle, &
+            is_on_a_slice_edge_outer_core, &
+            is_on_a_slice_edge_inner_core)
+
+  end subroutine cmi_free_arrays
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine cmi_read_solver_data(myrank,iregion_code, &
+                                  nspec,nglob, &
+                                  xstore,ystore,zstore, &
+                                  ibool,idoubling,is_on_a_slice_edge, &
+                                  LOCAL_PATH)
+  implicit none
+
+  include "constants.h"
+
+  integer :: iregion_code,myrank
+
+  integer :: nspec,nglob
+  
+  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore,ystore,zstore
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+  integer, dimension(nspec) :: idoubling
+  logical, dimension(nspec) :: is_on_a_slice_edge
+
+  character(len=150) :: LOCAL_PATH
+  
+  ! local parameters
+  character(len=150) prname
+  integer :: ier
+  
+  ! create the name for the database of the current slide and region
+  call create_name_database(prname,myrank,iregion_code,LOCAL_PATH)
+  
+  open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_2.bin', &
+       status='old',action='read',form='unformatted',iostat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error opening solver_data_2.bin')
+  
+  read(IIN) xstore
+  read(IIN) ystore
+  read(IIN) zstore
+  read(IIN) ibool
+  read(IIN) idoubling
+  read(IIN) is_on_a_slice_edge
+
+  close(IIN)
+  
+  end subroutine cmi_read_solver_data
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine cmi_save_solver_data(myrank,iregion_code,LOCAL_PATH, &
+                                  num_interfaces,max_nibool_interfaces, &
+                                  my_neighbours,nibool_interfaces, &
+                                  ibool_interfaces, &
+                                  nspec_inner,nspec_outer, &
+                                  num_phase_ispec,phase_ispec_inner, &
+                                  num_colors_outer,num_colors_inner, &
+                                  num_elem_colors)
+  implicit none
+
+  include "constants.h"
+
+  integer :: iregion_code,myrank
+
+  character(len=150) :: LOCAL_PATH
+
+  ! MPI interfaces
+  integer :: num_interfaces,max_nibool_interfaces
+  integer, dimension(num_interfaces) :: my_neighbours
+  integer, dimension(num_interfaces) :: nibool_interfaces
+  integer, dimension(max_nibool_interfaces,num_interfaces) :: &
+    ibool_interfaces
+
+  ! inner/outer elements
+  integer :: nspec_inner,nspec_outer
+  integer :: num_phase_ispec
+  integer,dimension(num_phase_ispec,2) :: phase_ispec_inner
+
+  ! mesh coloring
+  integer :: num_colors_outer,num_colors_inner
+  integer, dimension(num_colors_outer + num_colors_inner) :: &
+    num_elem_colors
+  
+  ! local parameters
+  character(len=150) prname
+  integer :: ier
+  
+  ! create the name for the database of the current slide and region
+  call create_name_database(prname,myrank,iregion_code,LOCAL_PATH)
+  
+  open(unit=IOUT,file=prname(1:len_trim(prname))//'solver_data_mpi.bin', &
+       status='unknown',action='write',form='unformatted',iostat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error opening solver_data_mpi.bin')
+  
+  ! MPI interfaces
+  write(IOUT) num_interfaces
+  if( num_interfaces > 0 ) then
+    write(IOUT) max_nibool_interfaces
+    write(IOUT) my_neighbours
+    write(IOUT) nibool_interfaces
+    write(IOUT) ibool_interfaces
+  endif
+
+  ! inner/outer elements
+  write(IOUT) nspec_inner,nspec_outer
+  write(IOUT) num_phase_ispec
+  if(num_phase_ispec > 0 ) write(IOUT) phase_ispec_inner
+
+  ! mesh coloring
+  if( USE_MESH_COLORING_GPU ) then
+    write(IOUT) num_colors_outer,num_colors_inner
+    write(IOUT) num_elem_colors
+  endif
+
+  close(IOUT)
+
+  end subroutine cmi_save_solver_data
+  
+  

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_central_cube_buffers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_central_cube_buffers.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_central_cube_buffers.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -0,0 +1,636 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!
+!--- create buffers to assemble with central cube
+!
+
+  subroutine create_central_cube_buffers(myrank,iproc_xi,iproc_eta,ichunk, &
+                   NPROC_XI,NPROC_ETA,NCHUNKS, &
+                   NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+                   NSPEC2DMAX_XMIN_XMAX_INNER_CORE,NSPEC2DMAX_YMIN_YMAX_INNER_CORE, &
+                   NSPEC2D_BOTTOM_INNER_CORE, &
+                   addressing,ibool_inner_core,idoubling_inner_core, &
+                   xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+                   nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
+                   nspec2D_ymin_inner_core,nspec2D_ymax_inner_core, &
+                   ibelm_xmin_inner_core,ibelm_xmax_inner_core, &
+                   ibelm_ymin_inner_core,ibelm_ymax_inner_core,ibelm_bottom_inner_core, &
+                   nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices, &
+                   receiver_cube_from_slices,sender_from_slices_to_cube,ibool_central_cube, &
+                   buffer_slices,buffer_slices2,buffer_all_cube_from_slices)
+
+  implicit none
+
+  ! standard include of the MPI library
+  include 'mpif.h'
+
+  include "constants.h"
+
+  integer, intent(in) :: myrank,iproc_xi,iproc_eta,ichunk, &
+       NPROC_XI,NPROC_ETA,NCHUNKS,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+       NSPEC2DMAX_XMIN_XMAX_INNER_CORE,NSPEC2DMAX_YMIN_YMAX_INNER_CORE,NSPEC2D_BOTTOM_INNER_CORE
+
+  ! for addressing of the slices
+  integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1), intent(in) :: addressing
+
+  ! mesh parameters
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE), intent(in) :: ibool_inner_core
+
+  ! local to global mapping
+  integer, dimension(NSPEC_INNER_CORE), intent(in) :: idoubling_inner_core
+
+  real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE), intent(in) :: xstore_inner_core,ystore_inner_core,zstore_inner_core
+
+  ! boundary parameters locator
+  integer, intent(in) :: nspec2D_xmin_inner_core,nspec2D_xmax_inner_core,nspec2D_ymin_inner_core,nspec2D_ymax_inner_core
+  integer, dimension(NSPEC2DMAX_XMIN_XMAX_INNER_CORE), intent(in) :: ibelm_xmin_inner_core,ibelm_xmax_inner_core
+  integer, dimension(NSPEC2DMAX_YMIN_YMAX_INNER_CORE), intent(in) :: ibelm_ymin_inner_core,ibelm_ymax_inner_core
+  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE), intent(in) :: ibelm_bottom_inner_core
+
+  integer, intent(in) :: nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices
+
+  ! for matching with central cube in inner core
+  integer, intent(out) :: receiver_cube_from_slices
+
+  integer, dimension(non_zero_nb_msgs_theor_in_cube), intent(out) :: sender_from_slices_to_cube
+  integer, dimension(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices), intent(out) :: ibool_central_cube
+  double precision, dimension(npoin2D_cube_from_slices,NDIM), intent(out) :: buffer_slices,buffer_slices2
+  double precision, dimension(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM), intent(out) :: &
+        buffer_all_cube_from_slices
+
+  ! local variables below
+  integer i,j,k,ispec,ispec2D,iglob,ier
+  integer sender,receiver,imsg,ipoin,iproc_xi_loop
+
+  double precision x_target,y_target,z_target
+  double precision x_current,y_current,z_current
+
+  ! MPI status of messages to be received
+  integer msg_status(MPI_STATUS_SIZE)
+
+  integer :: nproc_xi_half_floor,nproc_xi_half_ceil
+
+  if( mod(NPROC_XI,2) /= 0 ) then
+    nproc_xi_half_floor = floor(NPROC_XI/2.d0)
+    nproc_xi_half_ceil = ceiling(NPROC_XI/2.d0)
+  else
+    nproc_xi_half_floor = NPROC_XI/2
+    nproc_xi_half_ceil = NPROC_XI/2
+  endif
+
+  ! check that the number of points in this slice is correct
+  if(minval(ibool_inner_core(:,:,:,:)) /= 1 .or. maxval(ibool_inner_core(:,:,:,:)) /= NGLOB_INNER_CORE) &
+    call exit_MPI(myrank,'incorrect global numbering: iboolmax does not equal nglob in inner core')
+  
+
+!--- processor to send information to in cube from slices
+
+! four vertical sides first
+  if(ichunk == CHUNK_AC) then
+    if (iproc_xi < nproc_xi_half_floor) then
+      receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,NPROC_XI-1,iproc_eta)
+    else
+      receiver_cube_from_slices = addressing(CHUNK_AB,0,iproc_eta)
+    endif
+  else if(ichunk == CHUNK_BC) then
+    if (iproc_xi < nproc_xi_half_floor) then
+      receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,NPROC_XI-1-iproc_eta,NPROC_ETA-1)
+    else
+      receiver_cube_from_slices = addressing(CHUNK_AB,iproc_eta,NPROC_ETA-1)
+    endif
+  else if(ichunk == CHUNK_AC_ANTIPODE) then
+    if (iproc_xi <= ceiling((NPROC_XI/2.d0)-1)) then
+      receiver_cube_from_slices = addressing(CHUNK_AB,NPROC_XI-1,iproc_eta)
+    else
+      receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,0,iproc_eta)
+    endif
+  else if(ichunk == CHUNK_BC_ANTIPODE) then
+    if (iproc_xi < nproc_xi_half_floor) then
+      receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,iproc_eta,0)
+    else
+      receiver_cube_from_slices = addressing(CHUNK_AB,NPROC_XI-1-iproc_eta,0)
+    endif
+! bottom of cube, direct correspondance but with inverted xi axis
+  else if(ichunk == CHUNK_AB_ANTIPODE) then
+    receiver_cube_from_slices = addressing(CHUNK_AB,NPROC_XI-1-iproc_xi,iproc_eta)
+  else if(ichunk == CHUNK_AB) then
+    receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,NPROC_XI-1-iproc_xi,iproc_eta)
+  endif
+
+
+!--- list of processors to receive information from in cube
+
+! only for slices in central cube
+  if(ichunk == CHUNK_AB) then
+
+! initialize index of sender
+    imsg = 0
+
+! define sender for xi = xi_min edge
+    if(iproc_xi == 0) then
+      do iproc_xi_loop = nproc_xi_half_floor,NPROC_XI-1
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC,iproc_xi_loop,iproc_eta)
+      enddo
+    endif
+
+! define sender for xi = xi_max edge
+    if(iproc_xi == NPROC_XI-1) then
+      do iproc_xi_loop = 0, floor((NPROC_XI-1)/2.d0)
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC_ANTIPODE,iproc_xi_loop,iproc_eta)
+      enddo
+    endif
+
+! define sender for eta = eta_min edge
+    if(iproc_eta == 0) then
+      do iproc_xi_loop = nproc_xi_half_floor,NPROC_XI-1
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC_ANTIPODE,iproc_xi_loop,NPROC_ETA-1-iproc_xi)
+      enddo
+    endif
+
+! define sender for eta = eta_max edge
+    if(iproc_eta == NPROC_ETA-1) then
+      do iproc_xi_loop = nproc_xi_half_floor,NPROC_XI-1
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC,iproc_xi_loop,iproc_xi)
+      enddo
+    endif
+
+! define sender for bottom edge
+! bottom of cube, direct correspondence but with inverted xi axis
+    imsg = imsg + 1
+    sender_from_slices_to_cube(imsg) = addressing(CHUNK_AB_ANTIPODE,NPROC_XI-1-iproc_xi,iproc_eta)
+
+! check that total number of faces found is correct
+    if(imsg /= nb_msgs_theor_in_cube) then
+      print*,'error ',myrank,'nb_msgs_theor_in_cube:',nb_msgs_theor_in_cube,imsg
+      call exit_MPI(myrank,'wrong number of faces found for central cube')
+    endif
+
+  else if(ichunk == CHUNK_AB_ANTIPODE) then
+
+! initialize index of sender
+    imsg = 0
+
+! define sender for xi = xi_min edge
+    if(iproc_xi == 0) then
+      do iproc_xi_loop = nproc_xi_half_ceil,NPROC_XI-1
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC_ANTIPODE,iproc_xi_loop,iproc_eta)
+      enddo
+    endif
+
+! define sender for xi = xi_max edge
+    if(iproc_xi == NPROC_XI-1) then
+      do iproc_xi_loop = 0, floor((NPROC_XI/2.d0)-1.d0)
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC,iproc_xi_loop,iproc_eta)
+      enddo
+    endif
+
+! define sender for eta = eta_min edge
+    if(iproc_eta == 0) then
+      do iproc_xi_loop = 0, floor((NPROC_XI/2.d0)-1.d0)
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC_ANTIPODE,iproc_xi_loop,iproc_xi)
+      enddo
+    endif
+
+! define sender for eta = eta_max edge
+    if(iproc_eta == NPROC_ETA-1) then
+      do iproc_xi_loop = 0, floor((NPROC_XI/2.d0)-1.d0)
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC,iproc_xi_loop,NPROC_ETA-1-iproc_xi)
+      enddo
+    endif
+
+    ! daniel: in case NPROC_XI == 1, the other chunks exchange all bottom points with
+    ! CHUNK_AB **and** CHUNK_AB_ANTIPODE
+    if(NPROC_XI==1) then
+      ! define sender for xi = xi_min edge
+      if(iproc_xi == 0) then
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC_ANTIPODE,0,iproc_eta)
+      endif
+
+      ! define sender for xi = xi_max edge
+      if(iproc_xi == NPROC_XI-1) then
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC,0,iproc_eta)
+      endif
+
+      ! define sender for eta = eta_min edge
+      if(iproc_eta == 0) then
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC_ANTIPODE,0,iproc_xi)
+      endif
+
+      ! define sender for eta = eta_max edge
+      if(iproc_eta == NPROC_ETA-1) then
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC,0,NPROC_ETA-1-iproc_xi)
+      endif
+    endif
+
+! define sender for bottom edge
+! bottom of cube, direct correspondence but with inverted xi axis
+    imsg = imsg + 1
+    sender_from_slices_to_cube(imsg) = addressing(CHUNK_AB,NPROC_XI-1-iproc_xi,iproc_eta)
+
+! check that total number of faces found is correct
+    if(imsg /= nb_msgs_theor_in_cube) then
+      print*,'error ',myrank,'nb_msgs_theor_in_cube:',nb_msgs_theor_in_cube,imsg
+      call exit_MPI(myrank,'wrong number of faces found for central cube')
+    endif
+
+  else
+
+! dummy value in slices
+    sender_from_slices_to_cube(1) = -1
+
+  endif
+
+
+! on chunk AB & AB ANTIPODE, receive all (except bottom) the messages from slices
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+    do imsg = 1,nb_msgs_theor_in_cube-1
+
+    ! receive buffers from slices
+    sender = sender_from_slices_to_cube(imsg)
+    call MPI_RECV(buffer_slices, &
+              NDIM*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
+              itag,MPI_COMM_WORLD,msg_status,ier)
+
+    ! copy buffer in 2D array for each slice
+    buffer_all_cube_from_slices(imsg,:,:) = buffer_slices(:,:)
+
+    enddo
+  endif
+
+  ! send info to central cube from all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
+  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE ) then
+
+    ! for bottom elements in contact with central cube from the slices side
+    ipoin = 0
+    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+
+      ispec = ibelm_bottom_inner_core(ispec2D)
+
+      ! only for DOFs exactly on surface of central cube (bottom of these elements)
+      k = 1
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ipoin = ipoin + 1
+          iglob = ibool_inner_core(i,j,k,ispec)
+          buffer_slices(ipoin,1) = dble(xstore_inner_core(iglob))
+          buffer_slices(ipoin,2) = dble(ystore_inner_core(iglob))
+          buffer_slices(ipoin,3) = dble(zstore_inner_core(iglob))
+        enddo
+      enddo
+    enddo
+
+    ! send buffer to central cube
+    receiver = receiver_cube_from_slices
+    call MPI_SEND(buffer_slices,NDIM*npoin2D_cube_from_slices, &
+              MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,ier)
+
+    ! daniel: in case NPROC_XI == 1, the other chunks exchange all bottom points with
+    ! CHUNK_AB **and** CHUNK_AB_ANTIPODE
+    if(NPROC_XI==1) then
+      call MPI_SEND(buffer_slices,NDIM*npoin2D_cube_from_slices, &
+                   MPI_DOUBLE_PRECISION, &
+                   addressing(CHUNK_AB_ANTIPODE,0,iproc_eta), &
+                   itag,MPI_COMM_WORLD,ier)
+    endif
+
+  endif  ! end sending info to central cube
+
+
+  ! exchange of their bottom faces between chunks AB and AB_ANTIPODE
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+    ipoin = 0
+    do ispec = NSPEC_INNER_CORE, 1, -1
+      if (idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE) then
+        k = 1
+        do j = 1,NGLLY
+          do i = 1,NGLLX
+            ipoin = ipoin + 1
+            iglob = ibool_inner_core(i,j,k,ispec)
+            buffer_slices(ipoin,1) = dble(xstore_inner_core(iglob))
+            buffer_slices(ipoin,2) = dble(ystore_inner_core(iglob))
+            buffer_slices(ipoin,3) = dble(zstore_inner_core(iglob))
+          enddo
+        enddo
+      endif
+    enddo
+    if (ipoin /= npoin2D_cube_from_slices) then
+      print*,'error',myrank,'bottom points:',npoin2D_cube_from_slices,ipoin
+      call exit_MPI(myrank,'wrong number of points found for bottom CC AB or !AB')
+    endif
+
+    sender = sender_from_slices_to_cube(nb_msgs_theor_in_cube)
+
+    call MPI_SENDRECV(buffer_slices,NDIM*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
+        itag,buffer_slices2,NDIM*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
+        itag,MPI_COMM_WORLD,msg_status,ier)
+
+    buffer_all_cube_from_slices(nb_msgs_theor_in_cube,:,:) = buffer_slices2(:,:)
+
+  endif
+
+  !--- now we need to find the points received and create indirect addressing
+  ibool_central_cube(:,:) = -1
+
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+   do imsg = 1,nb_msgs_theor_in_cube
+
+    do ipoin = 1,npoin2D_cube_from_slices
+
+      x_target = buffer_all_cube_from_slices(imsg,ipoin,1)
+      y_target = buffer_all_cube_from_slices(imsg,ipoin,2)
+      z_target = buffer_all_cube_from_slices(imsg,ipoin,3)
+
+      ! x = x_min
+      do ispec2D = 1,nspec2D_xmin_inner_core
+        ispec = ibelm_xmin_inner_core(ispec2D)
+        ! do not loop on elements outside of the central cube
+        if(idoubling_inner_core(ispec) /= IFLAG_MIDDLE_CENTRAL_CUBE .and. &
+          idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE .and. &
+          idoubling_inner_core(ispec) /= IFLAG_TOP_CENTRAL_CUBE) cycle
+        ! check
+        if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE ) stop 'error xmin ibelm'
+        i = 1
+        do k = 1,NGLLZ
+          do j = 1,NGLLY
+           iglob = ibool_inner_core(i,j,k,ispec)
+           x_current = dble(xstore_inner_core(iglob))
+           y_current = dble(ystore_inner_core(iglob))
+           z_current = dble(zstore_inner_core(iglob))
+           ! look for matching point
+           if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
+             ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
+             goto 100
+           endif
+          enddo
+        enddo
+      enddo
+
+      ! x = x_max
+      do ispec2D = 1,nspec2D_xmax_inner_core
+        ispec = ibelm_xmax_inner_core(ispec2D)
+        ! do not loop on elements outside of the central cube
+        if(idoubling_inner_core(ispec) /= IFLAG_MIDDLE_CENTRAL_CUBE .and. &
+            idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE .and. &
+            idoubling_inner_core(ispec) /= IFLAG_TOP_CENTRAL_CUBE) cycle
+        !check
+        if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE ) stop 'error xmax ibelm'
+        i = NGLLX
+        do k = 1,NGLLZ
+          do j = 1,NGLLY
+            iglob = ibool_inner_core(i,j,k,ispec)
+            x_current = dble(xstore_inner_core(iglob))
+            y_current = dble(ystore_inner_core(iglob))
+            z_current = dble(zstore_inner_core(iglob))
+            ! look for matching point
+            if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
+              ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
+              goto 100
+            endif
+          enddo
+        enddo
+      enddo
+
+      ! y = y_min
+      do ispec2D = 1,nspec2D_ymin_inner_core
+        ispec = ibelm_ymin_inner_core(ispec2D)
+        ! do not loop on elements outside of the central cube
+        if(idoubling_inner_core(ispec) /= IFLAG_MIDDLE_CENTRAL_CUBE .and. &
+            idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE .and. &
+            idoubling_inner_core(ispec) /= IFLAG_TOP_CENTRAL_CUBE) cycle
+        !check
+        if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE ) stop 'error ymin ibelm'
+        j = 1
+        do k = 1,NGLLZ
+          do i = 1,NGLLX
+            iglob = ibool_inner_core(i,j,k,ispec)
+            x_current = dble(xstore_inner_core(iglob))
+            y_current = dble(ystore_inner_core(iglob))
+            z_current = dble(zstore_inner_core(iglob))
+            ! look for matching point
+            if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
+              ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
+              goto 100
+            endif
+          enddo
+        enddo
+      enddo
+
+      ! y = y_max
+      do ispec2D = 1,nspec2D_ymax_inner_core
+        ispec = ibelm_ymax_inner_core(ispec2D)
+        ! do not loop on elements outside of the central cube
+        if(idoubling_inner_core(ispec) /= IFLAG_MIDDLE_CENTRAL_CUBE .and. &
+            idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE .and. &
+            idoubling_inner_core(ispec) /= IFLAG_TOP_CENTRAL_CUBE) cycle
+        !check
+        if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE ) stop 'error ymax ibelm'
+        j = NGLLY
+        do k = 1,NGLLZ
+          do i = 1,NGLLX
+            iglob = ibool_inner_core(i,j,k,ispec)
+            x_current = dble(xstore_inner_core(iglob))
+            y_current = dble(ystore_inner_core(iglob))
+            z_current = dble(zstore_inner_core(iglob))
+            ! look for matching point
+            if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
+              ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
+              goto 100
+            endif
+          enddo
+        enddo
+      enddo
+
+      ! bottom of cube
+      do ispec = 1,NSPEC_INNER_CORE
+        ! loop on elements at the bottom of the cube only
+        if(idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE) cycle
+        k = 1
+        do j = 1,NGLLY
+          do i = 1,NGLLX
+            iglob = ibool_inner_core(i,j,k,ispec)
+            x_current = dble(xstore_inner_core(iglob))
+            y_current = dble(ystore_inner_core(iglob))
+            z_current = dble(zstore_inner_core(iglob))
+            ! look for matching point
+            if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
+              ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
+              goto 100
+            endif
+          enddo
+        enddo
+      enddo
+
+      ! point not found so far
+      if(NPROC_XI==1) then
+        ! ignores point
+        ibool_central_cube(imsg,ipoin) = 0
+      else
+        ! check that a matching point is found in all cases
+        call exit_MPI(myrank,'point never found in central cube')
+      endif
+
+ 100  continue
+
+    enddo ! ipoin
+
+    ! checks ibool array
+    if(NPROC_XI==1) then
+      if( minval(ibool_central_cube(imsg,:)) < 0 ) call exit_mpi(myrank,'error ibool_central_cube point not found')
+
+      ! removes points on bottom surface in antipode chunk for other chunks than its AB sharing chunk
+      ! (to avoid adding the same point twice from other chunks)
+      if( ichunk == CHUNK_AB_ANTIPODE .and. imsg < nb_msgs_theor_in_cube ) then
+        do ipoin = 1,npoin2D_cube_from_slices
+          x_target = buffer_all_cube_from_slices(imsg,ipoin,1)
+          y_target = buffer_all_cube_from_slices(imsg,ipoin,2)
+          z_target = buffer_all_cube_from_slices(imsg,ipoin,3)
+
+          ! bottom of cube
+          do ispec = 1,NSPEC_INNER_CORE
+            ! loop on elements at the bottom of the cube only
+            if(idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE) cycle
+            k = 1
+            do j = 1,NGLLY
+              do i = 1,NGLLX
+                iglob = ibool_inner_core(i,j,k,ispec)
+                x_current = dble(xstore_inner_core(iglob))
+                y_current = dble(ystore_inner_core(iglob))
+                z_current = dble(zstore_inner_core(iglob))
+                ! look for matching point
+                if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
+                  ibool_central_cube(imsg,ipoin) = 0
+                  goto 200
+                endif
+              enddo
+            enddo
+          enddo
+
+ 200      continue
+
+        enddo ! ipoin
+      endif
+
+    endif ! NPROC_XI==1
+
+   enddo ! imsg
+  endif
+
+  end subroutine create_central_cube_buffers
+
+!
+!----------------------------------
+!
+
+  subroutine comp_central_cube_buffer_size(iproc_xi,iproc_eta,ichunk,NPROC_XI,NPROC_ETA,NSPEC2D_BOTTOM_INNER_CORE, &
+                nb_msgs_theor_in_cube,npoin2D_cube_from_slices)
+
+!--- compute number of messages to expect in cube as well as their size
+!--- take into account vertical sides and bottom side
+
+  implicit none
+
+  include "constants.h"
+
+  integer, intent(in) :: iproc_xi,iproc_eta,ichunk,NPROC_XI,NPROC_ETA,NSPEC2D_BOTTOM_INNER_CORE
+
+  integer, intent(out) :: nb_msgs_theor_in_cube,npoin2D_cube_from_slices
+
+  integer :: nproc_xi_half_floor,nproc_xi_half_ceil
+
+  if( mod(NPROC_XI,2) /= 0 ) then
+    nproc_xi_half_floor = floor(NPROC_XI/2.d0)
+    nproc_xi_half_ceil = ceiling(NPROC_XI/2.d0)
+  else
+    nproc_xi_half_floor = NPROC_XI/2
+    nproc_xi_half_ceil = NPROC_XI/2
+  endif
+
+! only for slices in central cube
+  if(ichunk == CHUNK_AB) then
+    if(NPROC_XI == 1) then
+      ! five sides if only one processor in cube
+      nb_msgs_theor_in_cube = 5
+    else
+      ! case of a corner
+      if((iproc_xi == 0 .or. iproc_xi == NPROC_XI-1).and. &
+         (iproc_eta == 0 .or. iproc_eta == NPROC_ETA-1)) then
+        ! slices on both "vertical" faces plus one slice at the bottom
+        nb_msgs_theor_in_cube = 2*(nproc_xi_half_ceil) + 1
+      ! case of an edge
+      else if(iproc_xi == 0 .or. iproc_xi == NPROC_XI-1 .or. &
+              iproc_eta == 0 .or. iproc_eta == NPROC_ETA-1) then
+        ! slices on the "vertical" face plus one slice at the bottom
+        nb_msgs_theor_in_cube = nproc_xi_half_ceil + 1
+      else
+        ! bottom element only
+        nb_msgs_theor_in_cube = 1
+      endif
+    endif
+  else if(ichunk == CHUNK_AB_ANTIPODE) then
+    if(NPROC_XI == 1) then
+      ! five sides if only one processor in cube
+      nb_msgs_theor_in_cube = 5
+    else
+      ! case of a corner
+      if((iproc_xi == 0 .or. iproc_xi == NPROC_XI-1).and. &
+         (iproc_eta == 0 .or. iproc_eta == NPROC_ETA-1)) then
+        ! slices on both "vertical" faces plus one slice at the bottom
+        nb_msgs_theor_in_cube = 2*(nproc_xi_half_floor) + 1
+      ! case of an edge
+      else if(iproc_xi == 0 .or. iproc_xi == NPROC_XI-1 .or. &
+              iproc_eta == 0 .or. iproc_eta == NPROC_ETA-1) then
+        ! slices on the "vertical" face plus one slice at the bottom
+        nb_msgs_theor_in_cube = nproc_xi_half_floor + 1
+      else
+        ! bottom element only
+        nb_msgs_theor_in_cube = 1
+      endif
+    endif
+  else
+    ! not in chunk AB
+    nb_msgs_theor_in_cube = 0
+  endif
+
+  ! number of points to send or receive (bottom of slices)
+  npoin2D_cube_from_slices = NSPEC2D_BOTTOM_INNER_CORE * NGLLX * NGLLY
+
+  end subroutine comp_central_cube_buffer_size
+

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_chunk_buffers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_chunk_buffers.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_chunk_buffers.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -39,52 +39,71 @@
 
   implicit none
 
-! standard include of the MPI library
+  include "constants.h"
+
+  ! standard include of the MPI library
   include 'mpif.h'
-
-  include "constants.h"
   include "precision.h"
 
-  integer, dimension(MAX_NUM_REGIONS,NB_SQUARE_CORNERS) :: NGLOB1D_RADIAL_CORNER
-
-  integer nglob,nglob_ori
-  integer NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX
-  integer NPROC,NPROC_XI,NPROC_ETA,NPROCTOT,NGLOB1D_RADIAL_MAX,NGLOB1D_RADIAL
-  integer NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX
+  integer iregion_code
   integer nspec
-  integer myrank,NCHUNKS
 
-! arrays with the mesh
+  ! array with the local to global mapping per slice
+  integer ibool(NGLLX,NGLLY,NGLLZ,nspec)
+
+  integer idoubling(nspec)
+
+  ! arrays with the mesh
   double precision xstore(NGLLX,NGLLY,NGLLZ,nspec)
   double precision ystore(NGLLX,NGLLY,NGLLZ,nspec)
   double precision zstore(NGLLX,NGLLY,NGLLZ,nspec)
 
-  character(len=150) OUTPUT_FILES,LOCAL_PATH,ERR_MSG
+  integer nglob_ori
 
-! array with the local to global mapping per slice
-  integer ibool(NGLLX,NGLLY,NGLLZ,nspec)
+  integer NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX
+  integer NPROC,NPROC_XI,NPROC_ETA,NPROCTOT,NGLOB1D_RADIAL_MAX
+  integer NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX
 
-  integer idoubling(nspec)
+  integer myrank
+  character(len=150) LOCAL_PATH
 
-! mask for ibool to mark points already found
+  integer addressing(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1)
+
+  integer ichunk_slice(0:NPROCTOT-1)
+  integer iproc_xi_slice(0:NPROCTOT-1)
+  integer iproc_eta_slice(0:NPROCTOT-1)
+
+  integer NCHUNKS
+  
+  ! local parameters
+  integer NGLOB1D_RADIAL
+
+  integer, dimension(MAX_NUM_REGIONS,NB_SQUARE_CORNERS) :: NGLOB1D_RADIAL_CORNER
+
+  integer nglob
+
+
+  character(len=150) OUTPUT_FILES,ERR_MSG
+
+  ! mask for ibool to mark points already found
   logical, dimension(:), allocatable ::  mask_ibool
 
-! array to store points selected for the chunk face buffer
+  ! array to store points selected for the chunk face buffer
   integer NGLOB2DMAX_XY
   integer, dimension(:), allocatable :: ibool_selected
 
   double precision, dimension(:), allocatable :: xstore_selected,ystore_selected,zstore_selected
 
-! arrays for sorting routine
+  ! arrays for sorting routine
   integer, dimension(:), allocatable :: ind,ninseg,iglob,locval,iwork
   logical, dimension(:), allocatable :: ifseg
   double precision, dimension(:), allocatable :: work
 
-! pairs generated theoretically
-! four sides for each of the three types of messages
+  ! pairs generated theoretically
+  ! four sides for each of the three types of messages
   integer, dimension(:), allocatable :: iproc_sender,iproc_receiver,npoin2D_send,npoin2D_receive
 
-! 1D buffers to remove points belonging to corners
+  ! 1D buffers to remove points belonging to corners
   integer ibool1D_leftxi_lefteta(NGLOB1D_RADIAL_MAX)
   integer ibool1D_rightxi_lefteta(NGLOB1D_RADIAL_MAX)
   integer ibool1D_leftxi_righteta(NGLOB1D_RADIAL_MAX)
@@ -96,7 +115,7 @@
   double precision xdummy,ydummy,zdummy
   integer ipoin1D
 
-! arrays to assemble the corners (3 processors for each corner)
+  ! arrays to assemble the corners (3 processors for each corner)
   integer, dimension(:,:), allocatable :: iprocscorners,itypecorner
 
   integer ichunk_send,iproc_xi_send,iproc_eta_send
@@ -105,12 +124,11 @@
   integer iproc_xi_loop_inv,iproc_eta_loop_inv
   integer imember_corner
 
-  integer iregion_code
 
   integer iproc_edge_send,iproc_edge_receive
   integer imsg_type,iside,imode_comm,iedge
 
-! boundary parameters per slice
+  ! boundary parameters per slice
   integer nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax, njunk
   integer ibelm_xmin(NSPEC2DMAX_XMIN_XMAX),ibelm_xmax(NSPEC2DMAX_XMIN_XMAX)
   integer ibelm_ymin(NSPEC2DMAX_YMIN_YMAX),ibelm_ymax(NSPEC2DMAX_YMIN_YMAX)
@@ -119,30 +137,25 @@
 
   integer i,j,k,ispec,ispec2D,ipoin2D,ier
 
-! current message number
+  ! current message number
   integer imsg
 
-! names of the data files for all the processors in MPI
+  ! names of the data files for all the processors in MPI
   character(len=150) prname,filename_in,filename_out
 
-! for addressing of the slices
+  ! for addressing of the slices
   integer ichunk,iproc_xi,iproc_eta,iproc
-  integer addressing(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1)
-  integer ichunk_slice(0:NPROCTOT-1)
-  integer iproc_xi_slice(0:NPROCTOT-1)
 
-  integer iproc_eta_slice(0:NPROCTOT-1)
-
-! this to avoid problem at compile time if less than six chunks
+  ! this to avoid problem at compile time if less than six chunks
   integer addressing_big(NCHUNKS_MAX,0:NPROC_XI-1,0:NPROC_ETA-1)
 
-! number of faces between chunks
+  ! number of faces between chunks
   integer NUM_FACES,NUMMSGS_FACES
 
-! number of corners between chunks
+  ! number of corners between chunks
   integer NCORNERSCHUNKS
 
-! number of message types
+  ! number of message types
   integer NUM_MSG_TYPES
 
   integer NPROC_ONE_DIRECTION

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_doubling_elements.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_doubling_elements.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_doubling_elements.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -352,7 +352,8 @@
         ! boundary mesh
         if (ipass == 2 .and. SAVE_BOUNDARY_MESH .and. iregion_code == IREGION_CRUST_MANTLE) then
           is_superbrick=.true.
-          call get_jacobian_discontinuities(myrank,ispec,ix_elem,iy_elem,rmin,rmax,r1,r2,r3,r4,r5,r6,r7,r8, &
+          call get_jacobian_discontinuities(myrank,ispec,ix_elem,iy_elem,rmin,rmax, &
+              r1,r2,r3,r4,r5,r6,r7,r8, &
               xstore(:,:,:,ispec),ystore(:,:,:,ispec),zstore(:,:,:,ispec),dershape2D_bottom, &
               ibelm_moho_top,ibelm_moho_bot,ibelm_400_top,ibelm_400_bot,ibelm_670_top,ibelm_670_bot, &
               normal_moho,normal_400,normal_670,jacobian2D_moho,jacobian2D_400,jacobian2D_670, &

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_mass_matrices.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_mass_matrices.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_mass_matrices.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -116,8 +116,9 @@
                           + xizl*(etaxl*gammayl-etayl*gammaxl))
 
           ! definition depends if region is fluid or solid
-          if(iregion_code == IREGION_CRUST_MANTLE .or. iregion_code == IREGION_INNER_CORE) then
+          select case( iregion_code)
 
+          case( IREGION_CRUST_MANTLE, IREGION_INNER_CORE )
             ! distinguish between single and double precision for reals
             if(CUSTOM_REAL == SIZE_REAL) then
               rmass(iglobnum) = rmass(iglobnum) + &
@@ -127,8 +128,8 @@
             endif
 
           ! fluid in outer core
-          else if(iregion_code == IREGION_OUTER_CORE) then
-
+          case( IREGION_OUTER_CORE )
+          
             ! no anisotropy in the fluid, use kappav
 
             ! distinguish between single and double precision for reals
@@ -140,10 +141,11 @@
                      jacobianl * weight * rhostore(i,j,k,ispec) / kappavstore(i,j,k,ispec)
             endif
 
-          else
+          case default
             call exit_MPI(myrank,'wrong region code')
-          endif
 
+          end select
+          
         enddo
       enddo
     enddo
@@ -205,8 +207,9 @@
           endif
 
           ! take into account inertia of water column
-          weight = wxgll(ix_oceans)*wygll(iy_oceans)*dble(jacobian2D_top(ix_oceans,iy_oceans,ispec2D_top_crust)) &
-                 * dble(RHO_OCEANS) * height_oceans
+          weight = wxgll(ix_oceans) * wygll(iy_oceans) &
+                    * dble(jacobian2D_top(ix_oceans,iy_oceans,ispec2D_top_crust)) &
+                    * dble(RHO_OCEANS) * height_oceans
 
           ! distinguish between single and double precision for reals
           if(CUSTOM_REAL == SIZE_REAL) then

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_meshes.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_meshes.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_meshes.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -25,11 +25,21 @@
 !
 !=====================================================================
 
+
   subroutine create_meshes()
 
   use meshfem3D_par
   implicit none
-
+  
+  ! local parameters
+  ! parameters needed to store the radii of the grid points
+  ! in the spherically symmetric Earth
+  integer, dimension(:), allocatable :: idoubling
+  integer, dimension(:,:,:,:), allocatable :: ibool
+  ! arrays with the mesh in double precision
+  double precision, dimension(:,:,:,:), allocatable :: xstore,ystore,zstore
+  integer :: ier
+    
   ! get addressing for this process
   ichunk = ichunk_slice(myrank)
   iproc_xi = iproc_xi_slice(myrank)
@@ -70,15 +80,20 @@
     npointot = NSPEC(iregion_code) * NGLLX * NGLLY * NGLLZ
 
     ! use dynamic allocation to allocate memory for arrays
-    allocate(idoubling(NSPEC(iregion_code)))
-    allocate(ibool(NGLLX,NGLLY,NGLLZ,NSPEC(iregion_code)))
-    allocate(xstore(NGLLX,NGLLY,NGLLZ,NSPEC(iregion_code)))
-    allocate(ystore(NGLLX,NGLLY,NGLLZ,NSPEC(iregion_code)))
-    allocate(zstore(NGLLX,NGLLY,NGLLZ,NSPEC(iregion_code)))
+    allocate(idoubling(NSPEC(iregion_code)), &
+            ibool(NGLLX,NGLLY,NGLLZ,NSPEC(iregion_code)), &
+            xstore(NGLLX,NGLLY,NGLLZ,NSPEC(iregion_code)), &
+            ystore(NGLLX,NGLLY,NGLLZ,NSPEC(iregion_code)), &
+            zstore(NGLLX,NGLLY,NGLLZ,NSPEC(iregion_code)), &
+            stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating memory for arrays')
+    
+    ! this for non blocking MPI
+    allocate(is_on_a_slice_edge(NSPEC(iregion_code)), &
+            stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating is_on_a_slice_edge array')
+    
 
-  ! this for non blocking MPI
-    allocate(is_on_a_slice_edge(NSPEC(iregion_code)))
-
     ! create all the regions of the mesh
     ! perform two passes in this part to be able to save memory
     do ipass = 1,2
@@ -86,13 +101,12 @@
                           xstore,ystore,zstore,rmins,rmaxs, &
                           iproc_xi,iproc_eta,ichunk,NSPEC(iregion_code),nspec_tiso, &
                           volume_local,area_local_bottom,area_local_top, &
-                          nglob(iregion_code),npointot, &
-                          NSTEP,DT, &
+                          NGLOB(iregion_code),npointot, &
                           NEX_XI,NEX_PER_PROC_XI,NEX_PER_PROC_ETA, &
                           NSPEC2DMAX_XMIN_XMAX(iregion_code),NSPEC2DMAX_YMIN_YMAX(iregion_code), &
                           NSPEC2D_BOTTOM(iregion_code),NSPEC2D_TOP(iregion_code), &
-                          NPROC_XI,NPROC_ETA,NSPEC2D_XI_FACE, &
-                          NSPEC2D_ETA_FACE,NSPEC1D_RADIAL_CORNER,NGLOB1D_RADIAL_CORNER, &
+                          NPROC_XI,NPROC_ETA, &
+                          NSPEC2D_XI_FACE,NSPEC2D_ETA_FACE,NSPEC1D_RADIAL_CORNER,NGLOB1D_RADIAL_CORNER, &
                           myrank,LOCAL_PATH,rotation_matrix,ANGULAR_WIDTH_XI_RAD,ANGULAR_WIDTH_ETA_RAD, &
                           SAVE_MESH_FILES,NCHUNKS,INCLUDE_CENTRAL_CUBE,ABSORBING_CONDITIONS, &
                           R_CENTRAL_CUBE,RICB,RHO_OCEANS,RCMB,R670,RMOHO,RMOHO_FICTITIOUS_IN_MESHER,&
@@ -123,7 +137,7 @@
     if(NCHUNKS > 1) then
       call create_chunk_buffers(iregion_code,NSPEC(iregion_code),ibool,idoubling, &
                               xstore,ystore,zstore, &
-                              nglob(iregion_code), &
+                              NGLOB(iregion_code), &
                               NSPEC2DMAX_XMIN_XMAX(iregion_code),NSPEC2DMAX_YMIN_YMAX(iregion_code), &
                               NPROC_XI,NPROC_ETA,NPROC,NPROCTOT, &
                               NGLOB1D_RADIAL_CORNER,maxval(NGLOB1D_RADIAL_CORNER(iregion_code,:)), &
@@ -145,7 +159,7 @@
     deallocate(ystore)
     deallocate(zstore)
 
-  ! this for non blocking MPI
+    ! this for non blocking MPI
     deallocate(is_on_a_slice_edge)
 
     ! make sure everybody is synchronized

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_regions_mesh.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_regions_mesh.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_regions_mesh.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -25,17 +25,48 @@
 !
 !=====================================================================
 
+  module create_regions_mesh_par
+  
+  use constants,only: NGLLX,NGLLY,NGLLZ,NGNOD,NGNOD2D,NDIM,NDIM2D
+  
+  ! topology of the elements
+  integer, dimension(NGNOD) :: iaddx,iaddy,iaddz
+
+  ! Gauss-Lobatto-Legendre points and weights of integration
+  double precision, dimension(NGLLX) :: xigll,wxgll
+  double precision, dimension(NGLLY) :: yigll,wygll
+  double precision, dimension(NGLLZ) :: zigll,wzgll
+
+  ! 3D shape functions and their derivatives
+  double precision, dimension(NGNOD,NGLLX,NGLLY,NGLLZ) :: shape3D
+  double precision, dimension(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ) :: dershape3D
+
+  ! 2D shape functions and their derivatives
+  double precision, dimension(NGNOD2D,NGLLY,NGLLZ) :: shape2D_x
+  double precision, dimension(NGNOD2D,NGLLX,NGLLZ) :: shape2D_y
+  double precision, dimension(NGNOD2D,NGLLX,NGLLY) :: shape2D_bottom,shape2D_top
+  double precision, dimension(NDIM2D,NGNOD2D,NGLLY,NGLLZ) :: dershape2D_x
+  double precision, dimension(NDIM2D,NGNOD2D,NGLLX,NGLLZ) :: dershape2D_y
+  double precision, dimension(NDIM2D,NGNOD2D,NGLLX,NGLLY) :: dershape2D_bottom,dershape2D_top  
+  
+  end module create_regions_mesh_par
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
   subroutine create_regions_mesh(iregion_code,ibool,idoubling,is_on_a_slice_edge, &
                           xstore,ystore,zstore,rmins,rmaxs, &
-                          iproc_xi,iproc_eta,ichunk,nspec,nspec_tiso, &
+                          iproc_xi,iproc_eta,ichunk, &
+                          nspec,nspec_tiso, &
                           volume_local,area_local_bottom,area_local_top, &
                           nglob_theor,npointot, &
-                          NSTEP,DT, &
                           NEX_XI,NEX_PER_PROC_XI,NEX_PER_PROC_ETA, &
                           NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX, &
                           NSPEC2D_BOTTOM,NSPEC2D_TOP, &
-                          NPROC_XI,NPROC_ETA,NSPEC2D_XI_FACE, &
-                          NSPEC2D_ETA_FACE,NSPEC1D_RADIAL_CORNER,NGLOB1D_RADIAL_CORNER, &
+                          NPROC_XI,NPROC_ETA, &
+                          NSPEC2D_XI_FACE,NSPEC2D_ETA_FACE,NSPEC1D_RADIAL_CORNER,NGLOB1D_RADIAL_CORNER, &
                           myrank,LOCAL_PATH,rotation_matrix,ANGULAR_WIDTH_XI_RAD,ANGULAR_WIDTH_ETA_RAD,&
                           SAVE_MESH_FILES,NCHUNKS,INCLUDE_CENTRAL_CUBE,ABSORBING_CONDITIONS, &
                           R_CENTRAL_CUBE,RICB,RHO_OCEANS,RCMB,R670,RMOHO,RMOHO_FICTITIOUS_IN_MESHER,&
@@ -49,89 +80,87 @@
 ! creates the different regions of the mesh
 
   use meshfem3D_models_par
-
+  use create_regions_mesh_par  
   implicit none
 
-!****************************************************************************************************
-! Mila
+  ! code for the four regions of the mesh
+  integer :: iregion_code
 
-!  include "constants.h"
-! standard include of the MPI library
-  include 'mpif.h'
+  ! correct number of spectral elements in each block depending on chunk type
+  integer :: nspec,nspec_tiso
+  integer :: nspec_stacey,nspec_actually,nspec_att
 
-!****************************************************************************************************
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+  integer, dimension(nspec) :: idoubling
 
-  ! this to cut the doubling brick
-  integer, dimension(MAX_NUM_REGIONS,NB_SQUARE_CORNERS) :: NSPEC1D_RADIAL_CORNER,NGLOB1D_RADIAL_CORNER
-  integer, dimension(MAX_NUM_REGIONS,NB_SQUARE_EDGES_ONEDIR) :: NSPEC2D_XI_FACE,NSPEC2D_ETA_FACE
-  logical :: CUT_SUPERBRICK_XI,CUT_SUPERBRICK_ETA
-  integer :: offset_proc_xi,offset_proc_eta
+  ! this for non blocking MPI
+  logical, dimension(nspec) :: is_on_a_slice_edge
 
-  integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: ner,ratio_sampling_array
-  double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: r_bottom,r_top
-  logical, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: this_region_has_a_doubling
+  ! arrays with the mesh in double precision
+  double precision xstore(NGLLX,NGLLY,NGLLZ,nspec)
+  double precision ystore(NGLLX,NGLLY,NGLLZ,nspec)
+  double precision zstore(NGLLX,NGLLY,NGLLZ,nspec)
 
-  integer :: ner_without_doubling,ilayer,ilayer_loop, &
-               ifirst_region,ilast_region,ratio_divide_central_cube
-  integer, dimension(:), allocatable :: perm_layer
+  ! meshing parameters
+  double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: rmins,rmaxs  
 
-  ! correct number of spectral elements in each block depending on chunk type
-  integer nspec,nspec_tiso,nspec_stacey,nspec_actually,nspec_att
+  integer :: iproc_xi,iproc_eta,ichunk
 
-  integer NEX_XI,NEX_PER_PROC_XI,NEX_PER_PROC_ETA,NCHUNKS
+  ! check area and volume of the final mesh
+  double precision :: area_local_bottom,area_local_top
+  double precision :: volume_local
 
-  integer NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX,NSPEC2D_BOTTOM,NSPEC2D_TOP
+  integer :: nglob_theor,npointot
 
-  integer NPROC_XI,NPROC_ETA
+  integer :: NEX_XI,NEX_PER_PROC_XI,NEX_PER_PROC_ETA
+  integer :: NSPEC2DMAX_XMIN_XMAX,NSPEC2DMAX_YMIN_YMAX,NSPEC2D_BOTTOM,NSPEC2D_TOP
+  integer :: NPROC_XI,NPROC_ETA
+  
+  ! this to cut the doubling brick
+  integer, dimension(MAX_NUM_REGIONS,NB_SQUARE_EDGES_ONEDIR) :: NSPEC2D_XI_FACE,NSPEC2D_ETA_FACE
+  integer, dimension(MAX_NUM_REGIONS,NB_SQUARE_CORNERS) :: NSPEC1D_RADIAL_CORNER,NGLOB1D_RADIAL_CORNER
 
-  integer npointot
+  ! proc numbers for MPI
+  integer myrank
 
-  logical SAVE_MESH_FILES
+  character(len=150) :: LOCAL_PATH
 
-  logical INCLUDE_CENTRAL_CUBE,ABSORBING_CONDITIONS
+  ! rotation matrix from Euler angles
+  double precision, dimension(NDIM,NDIM) :: rotation_matrix
 
-  double precision R_CENTRAL_CUBE,RICB,RCMB,R670,RMOHO, &
-          RTOPDDOUBLEPRIME,R600,R220,R771,R400,R120,R80,RMIDDLE_CRUST,ROCEAN, &
-          RMOHO_FICTITIOUS_IN_MESHER
+  double precision :: ANGULAR_WIDTH_XI_RAD,ANGULAR_WIDTH_ETA_RAD
 
-  double precision RHO_OCEANS
+  logical :: SAVE_MESH_FILES
+  
+  integer :: NCHUNKS
 
-  character(len=150) LOCAL_PATH,errmsg
+  logical :: INCLUDE_CENTRAL_CUBE,ABSORBING_CONDITIONS
 
-  ! arrays with the mesh in double precision
-  double precision xstore(NGLLX,NGLLY,NGLLZ,nspec)
-  double precision ystore(NGLLX,NGLLY,NGLLZ,nspec)
-  double precision zstore(NGLLX,NGLLY,NGLLZ,nspec)
+  double precision :: R_CENTRAL_CUBE,RICB
+  double precision :: RHO_OCEANS  
+  double precision :: RCMB,R670,RMOHO,RMOHO_FICTITIOUS_IN_MESHER, &
+    RTOPDDOUBLEPRIME,R600,R220,R771,R400,R120,R80,RMIDDLE_CRUST,ROCEAN
+    
+  integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: ner,ratio_sampling_array
+  integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: doubling_index
 
-  ! meshing parameters
-  double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: rmins,rmaxs
+  double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: r_bottom,r_top
+  logical, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: this_region_has_a_doubling
 
-  integer ibool(NGLLX,NGLLY,NGLLZ,nspec)
+  integer :: ratio_divide_central_cube
 
-  ! topology of the elements
-  integer, dimension(NGNOD) :: iaddx,iaddy,iaddz
+  logical :: CUT_SUPERBRICK_XI,CUT_SUPERBRICK_ETA
+  integer :: offset_proc_xi,offset_proc_eta
 
-  ! code for the four regions of the mesh
-  integer iregion_code
+  ! now perform two passes in this part to be able to save memory
+  integer :: ipass
 
-  ! Gauss-Lobatto-Legendre points and weights of integration
-  double precision, dimension(:), allocatable :: xigll,yigll,zigll,wxgll,wygll,wzgll
+  ! local parameters
 
-  ! 3D shape functions and their derivatives
-  double precision, dimension(:,:,:,:), allocatable :: shape3D
-  double precision, dimension(:,:,:,:,:), allocatable :: dershape3D
+  integer :: ner_without_doubling,ilayer,ilayer_loop, &
+               ifirst_region,ilast_region
+  integer, dimension(:), allocatable :: perm_layer
 
-  ! 2D shape functions and their derivatives
-  double precision, dimension(:,:,:), allocatable :: shape2D_x,shape2D_y, &
-    shape2D_bottom,shape2D_top
-  double precision, dimension(:,:,:,:), allocatable :: dershape2D_x,dershape2D_y, &
-    dershape2D_bottom,dershape2D_top
-
-  integer, dimension(nspec) :: idoubling
-
-! this for non blocking MPI
-  logical, dimension(nspec) :: is_on_a_slice_edge
-
   ! parameters needed to store the radii of the grid points in the spherically symmetric Earth
   double precision rmin,rmax
 
@@ -153,20 +182,13 @@
   real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: xixstore,xiystore,xizstore, &
     etaxstore,etaystore,etazstore,gammaxstore,gammaystore,gammazstore
 
-  ! proc numbers for MPI
-  integer myrank
-
-  ! check area and volume of the final mesh
-  double precision area_local_bottom,area_local_top
-  double precision volume_local
-
   ! variables for creating array ibool (some arrays also used for AVS or DX files)
   integer, dimension(:), allocatable :: locval
   logical, dimension(:), allocatable :: ifseg
   double precision, dimension(:), allocatable :: xp,yp,zp
+  integer :: nglob
+  integer :: ieoff,ilocnum,ier
 
-  integer nglob,nglob_theor,ieoff,ilocnum,ier
-
   ! mass matrix
   real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass
 
@@ -193,27 +215,16 @@
   integer, dimension(:,:), allocatable :: nimin,nimax,njmin,njmax,nkmin_xi,nkmin_eta
   real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: rho_vp,rho_vs
 
-  ! name of the database file
-  character(len=150) prname
 
   ! number of elements on the boundaries
   integer nspec2D_xmin,nspec2D_xmax,nspec2D_ymin,nspec2D_ymax
 
-  integer i,j,k,ispec
-  integer iproc_xi,iproc_eta,ichunk
-
-  double precision ANGULAR_WIDTH_XI_RAD,ANGULAR_WIDTH_ETA_RAD
-
-  ! rotation matrix from Euler angles
-  double precision, dimension(NDIM,NDIM) :: rotation_matrix
-
   ! attenuation
   double precision, dimension(:,:,:,:),   allocatable :: Qmu_store
   double precision, dimension(:,:,:,:,:), allocatable :: tau_e_store
   double precision, dimension(N_SLS)                  :: tau_s
   double precision  T_c_source
 
-  integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: doubling_index
   logical :: USE_ONE_LAYER_SB
 
   integer NUMBER_OF_MESH_LAYERS,layer_shift,cpt, &
@@ -223,35 +234,10 @@
 
   integer :: nb_layer_above_aniso,FIRST_ELT_ABOVE_ANISO
 
-  ! now perform two passes in this part to be able to save memory
-  integer :: ipass
-
   logical :: ACTUALLY_STORE_ARRAYS
 
-!****************************************************************************************************
-! Mila
-
-! added for color permutation
-  integer :: nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer
-  integer, dimension(:), allocatable :: perm
-  integer, dimension(:), allocatable :: first_elem_number_in_this_color
-  integer, dimension(:), allocatable :: num_of_elems_in_this_color
-
-  integer :: icolor,ispec_counter
-  integer :: nspec_outer_min_global,nspec_outer_max_global
-
-!****************************************************************************************************
-
-!///////////////////////////////////////////////////////////////////////////////
-!   Manh Ha - 18-11-2011
-!   Adding new variables
-
-  integer :: NSTEP
   integer, save :: npoin2D_xi,npoin2D_eta
-  double precision :: DT
 
-!///////////////////////////////////////////////////////////////////////////////
-
   ! Boundary Mesh
   integer NSPEC2D_MOHO,NSPEC2D_400,NSPEC2D_670,nex_eta_moho
   integer, dimension(:), allocatable :: ibelm_moho_top,ibelm_moho_bot,ibelm_400_top,ibelm_400_bot, &
@@ -265,6 +251,12 @@
   ! flags for transverse isotropic elements
   logical, dimension(:), allocatable :: ispec_is_tiso
 
+  integer i,j,k,ispec
+  
+  ! name of the database file
+  character(len=150) :: prname
+  character(len=150) :: errmsg
+
   ! user output
   if(myrank == 0 ) then
     if(ipass == 1 ) write(IMAIN,*) 'first pass'
@@ -274,6 +266,13 @@
   ! create the name for the database of the current slide and region
   call create_name_database(prname,myrank,iregion_code,LOCAL_PATH)
 
+! initializes arrays
+  call sync_all()
+  if( myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) '  ...allocating arrays '
+  endif
+
   ! New Attenuation definition on all GLL points
   ! Attenuation
   if (ATTENUATION) then
@@ -287,34 +286,6 @@
           tau_e_store(N_SLS,NGLLX,NGLLY,NGLLZ,nspec_att),stat=ier)
   if(ier /= 0) stop 'error in allocate 1'
 
-  ! Gauss-Lobatto-Legendre points of integration
-  allocate(xigll(NGLLX), &
-          yigll(NGLLY), &
-          zigll(NGLLZ),stat=ier)
-  if(ier /= 0) stop 'error in allocate 2'
-
-  ! Gauss-Lobatto-Legendre weights of integration
-  allocate(wxgll(NGLLX), &
-          wygll(NGLLY), &
-          wzgll(NGLLZ),stat=ier)
-  if(ier /= 0) stop 'error in allocate 3'
-
-  ! 3D shape functions and their derivatives
-  allocate(shape3D(NGNOD,NGLLX,NGLLY,NGLLZ), &
-          dershape3D(NDIM,NGNOD,NGLLX,NGLLY,NGLLZ),stat=ier)
-  if(ier /= 0) stop 'error in allocat 4'
-
-  ! 2D shape functions and their derivatives
-  allocate(shape2D_x(NGNOD2D,NGLLY,NGLLZ), &
-          shape2D_y(NGNOD2D,NGLLX,NGLLZ), &
-          shape2D_bottom(NGNOD2D,NGLLX,NGLLY), &
-          shape2D_top(NGNOD2D,NGLLX,NGLLY), &
-          dershape2D_x(NDIM2D,NGNOD2D,NGLLY,NGLLZ), &
-          dershape2D_y(NDIM2D,NGNOD2D,NGLLX,NGLLZ), &
-          dershape2D_bottom(NDIM2D,NGNOD2D,NGLLX,NGLLY), &
-          dershape2D_top(NDIM2D,NGNOD2D,NGLLX,NGLLY),stat=ier)
-  if(ier /= 0) stop 'error in allocate 5'
-
   ! array with model density
   allocate(rhostore(NGLLX,NGLLY,NGLLZ,nspec), &
           dvpstore(NGLLX,NGLLY,NGLLZ,nspec),stat=ier)
@@ -649,9 +620,9 @@
   ! check total number of spectral elements created
   if(ispec /= nspec) call exit_MPI(myrank,'ispec should equal nspec')
 
-! if any of these flags is true, the element is on a communication edge
-! this is not enough because it can also be in contact by an edge or a corner but not a full face
-! therefore we will have to fix array "is_on_a_slice_edge" later in the solver to take this into account
+  ! if any of these flags is true, the element is on a communication edge
+  ! this is not enough because it can also be in contact by an edge or a corner but not a full face
+  ! therefore we will have to fix array "is_on_a_slice_edge" later in the solver to take this into account
   is_on_a_slice_edge(:) = &
       iMPIcut_xi(1,:) .or. iMPIcut_xi(2,:) .or. &
       iMPIcut_eta(1,:) .or. iMPIcut_eta(2,:) .or. &
@@ -659,8 +630,8 @@
       iboun(3,:) .or. iboun(4,:) .or. &
       iboun(5,:) .or. iboun(6,:)
 
-! no need to count fictitious elements on the edges
-! for which communications cannot be overlapped with calculations
+  ! no need to count fictitious elements on the edges
+  ! for which communications cannot be overlapped with calculations
   where(idoubling == IFLAG_IN_FICTITIOUS_CUBE) is_on_a_slice_edge = .false.
 
   ! only create global addressing and the MPI buffers in the first pass
@@ -801,205 +772,11 @@
     !nspec_tiso = count(idoubling(1:nspec) == IFLAG_220_80) + count(idoubling(1:nspec) == IFLAG_80_MOHO)
     nspec_tiso = count(ispec_is_tiso(:))
 
-    !****************************************************************************************************
-    ! Mila
+    ! mesh sorting and coloring
+    call setup_color_perm(myrank,iregion_code,nspec,nglob, &
+                              ibool,is_on_a_slice_edge,prname, &
+                              npoin2D_xi,npoin2D_eta)
 
-    if(SORT_MESH_INNER_OUTER) then
-
-      !!!! David Michea: detection of the edges, coloring and permutation separately
-      allocate(perm(nspec))
-
-      ! implement mesh coloring for GPUs if needed, to create subsets of disconnected elements
-      ! to remove dependencies and the need for atomic operations in the sum of elemental contributions in the solver
-      if(USE_MESH_COLORING_GPU) then
-
-        ! user output
-        if(myrank == 0 ) write(IMAIN,*) '  creating mesh coloring'
-
-        allocate(first_elem_number_in_this_color(MAX_NUMBER_OF_COLORS + 1))
-
-        call get_perm_color_faster(is_on_a_slice_edge,ibool,perm,nspec,nglob, &
-          nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer,first_elem_number_in_this_color,myrank)
-
-        ! for the last color, the next color is fictitious and its first (fictitious) element number is nspec + 1
-        first_elem_number_in_this_color(nb_colors_outer_elements + nb_colors_inner_elements + 1) = nspec + 1
-
-        allocate(num_of_elems_in_this_color(nb_colors_outer_elements + nb_colors_inner_elements))
-
-        ! save mesh coloring
-        open(unit=99,file=prname(1:len_trim(prname))//'num_of_elems_in_this_color.dat',status='unknown')
-
-        ! number of colors for outer elements
-        write(99,*) nb_colors_outer_elements
-
-        ! number of colors for inner elements
-        write(99,*) nb_colors_inner_elements
-
-        ! number of elements in each color
-        do icolor = 1, nb_colors_outer_elements + nb_colors_inner_elements
-          num_of_elems_in_this_color(icolor) = first_elem_number_in_this_color(icolor+1) &
-                                              - first_elem_number_in_this_color(icolor)
-          write(99,*) num_of_elems_in_this_color(icolor)
-        enddo
-        close(99)
-
-        ! check that the sum of all the numbers of elements found in each color is equal
-        ! to the total number of elements in the mesh
-        if(sum(num_of_elems_in_this_color) /= nspec) then
-          print *,'nspec = ',nspec
-          print *,'total number of elements in all the colors of the mesh = ',sum(num_of_elems_in_this_color)
-          stop 'incorrect total number of elements in all the colors of the mesh'
-        endif
-
-        ! check that the sum of all the numbers of elements found in each color for the outer elements is equal
-        ! to the total number of outer elements found in the mesh
-        if(sum(num_of_elems_in_this_color(1:nb_colors_outer_elements)) /= nspec_outer) then
-          print *,'nspec_outer = ',nspec_outer
-          print *,'total number of elements in all the colors of the mesh for outer elements = ', &
-            sum(num_of_elems_in_this_color)
-          stop 'incorrect total number of elements in all the colors of the mesh for outer elements'
-        endif
-
-        call MPI_ALLREDUCE(nspec_outer,nspec_outer_min_global,1,MPI_INTEGER,MPI_MIN,MPI_COMM_WORLD,ier)
-        call MPI_ALLREDUCE(nspec_outer,nspec_outer_max_global,1,MPI_INTEGER,MPI_MAX,MPI_COMM_WORLD,ier)
-
-        deallocate(first_elem_number_in_this_color)
-        deallocate(num_of_elems_in_this_color)
-
-      else
-
-        !! DK DK for regular C + MPI version for CPUs: do not use colors but nonetheless put all the outer elements
-        !! DK DK first in order to be able to overlap non-blocking MPI communications with calculations
-
-        !! DK DK nov 2010, for Rosa Badia / StarSs:
-        !! no need for mesh coloring, but need to implement inner/outer subsets for non blocking MPI for StarSs
-        ispec_counter = 0
-        perm(:) = 0
-
-        ! first generate all the outer elements
-        do ispec = 1,nspec
-          if(is_on_a_slice_edge(ispec)) then
-            ispec_counter = ispec_counter + 1
-            perm(ispec) = ispec_counter
-          endif
-        enddo
-
-        ! make sure we have detected some outer elements
-        if(ispec_counter <= 0) stop 'fatal error: no outer elements detected!'
-
-        ! store total number of outer elements
-        nspec_outer = ispec_counter
-
-        ! then generate all the inner elements
-        do ispec = 1,nspec
-          if(.not. is_on_a_slice_edge(ispec)) then
-            ispec_counter = ispec_counter + 1
-            perm(ispec) = ispec_counter
-          endif
-        enddo
-
-        ! test that all the elements have been used once and only once
-        if(ispec_counter /= nspec) stop 'fatal error: ispec_counter not equal to nspec'
-
-        ! do basic checks
-        if(minval(perm) /= 1) stop 'minval(perm) should be 1'
-        if(maxval(perm) /= nspec) stop 'maxval(perm) should be nspec'
-
-        call MPI_ALLREDUCE(nspec_outer,nspec_outer_min_global,1,MPI_INTEGER,MPI_MIN,MPI_COMM_WORLD,ier)
-        call MPI_ALLREDUCE(nspec_outer,nspec_outer_max_global,1,MPI_INTEGER,MPI_MAX,MPI_COMM_WORLD,ier)
-
-      endif ! USE_MESH_COLORING_GPU
-
-      !! DK DK and Manh Ha, Nov 2011: added this to use the new mesher in the CUDA or C / StarSs test codes
-
-      if (myrank == 0 .and. iregion_code == IREGION_CRUST_MANTLE) then
-        ! write a header file for the Fortran version of the solver
-        open(unit=99,file=prname(1:len_trim(prname))//'values_from_mesher_f90.h',status='unknown')
-        write(99,*) 'integer, parameter :: NSPEC = ',nspec
-        write(99,*) 'integer, parameter :: NGLOB = ',nglob
-        !!! DK DK use 1000 time steps only for the scaling tests
-        write(99,*) 'integer, parameter :: NSTEP = 1000 !!!!!!!!!!! ',nstep
-        write(99,*) 'real(kind=4), parameter :: deltat = ',DT
-        write(99,*)
-        write(99,*) 'integer, parameter ::  NGLOB2DMAX_XMIN_XMAX = ',npoin2D_xi
-        write(99,*) 'integer, parameter ::  NGLOB2DMAX_YMIN_YMAX = ',npoin2D_eta
-        write(99,*) 'integer, parameter ::  NGLOB2DMAX_ALL = ',max(npoin2D_xi,npoin2D_eta)
-        write(99,*) 'integer, parameter ::  NPROC_XI = ',NPROC_XI
-        write(99,*) 'integer, parameter ::  NPROC_ETA = ',NPROC_ETA
-        write(99,*)
-        write(99,*) '! element number of the source and of the station'
-        write(99,*) '! after permutation of the elements by mesh coloring'
-        write(99,*) '! and inner/outer set splitting in the mesher'
-        write(99,*) 'integer, parameter :: NSPEC_SOURCE = ',perm(NSPEC/3)
-        write(99,*) 'integer, parameter :: RANK_SOURCE = 0'
-        write(99,*)
-        write(99,*) 'integer, parameter :: RANK_STATION = (NPROC_XI*NPROC_ETA - 1)'
-        write(99,*) 'integer, parameter :: NSPEC_STATION = ',perm(2*NSPEC/3)
-
-        ! save coordinates of the seismic source
-        !   write(99,*) xstore(2,2,2,10);
-        !   write(99,*) ystore(2,2,2,10);
-        !   write(99,*) zstore(2,2,2,10);
-
-        ! save coordinates of the seismic station
-        !   write(99,*) xstore(2,2,2,nspec-10);
-        !   write(99,*) ystore(2,2,2,nspec-10);
-        !   write(99,*) zstore(2,2,2,nspec-10);
-        close(99)
-
-        !! write a header file for the C version of the solver
-        open(unit=99,file=prname(1:len_trim(prname))//'values_from_mesher_C.h',status='unknown')
-        write(99,*) '#define NSPEC ',nspec
-        write(99,*) '#define NGLOB ',nglob
-        !!    write(99,*) '#define NSTEP ',nstep
-        !!! DK DK use 1000 time steps only for the scaling tests
-        write(99,*) '// #define NSTEP ',nstep
-        write(99,*) '#define NSTEP 1000'
-        ! put an "f" at the end to force single precision
-        write(99,"('#define deltat ',e18.10,'f')") DT
-        write(99,*) '#define NGLOB2DMAX_XMIN_XMAX ',npoin2D_xi
-        write(99,*) '#define NGLOB2DMAX_YMIN_YMAX ',npoin2D_eta
-        write(99,*) '#define NGLOB2DMAX_ALL ',max(npoin2D_xi,npoin2D_eta)
-        write(99,*) '#define NPROC_XI ',NPROC_XI
-        write(99,*) '#define NPROC_ETA ',NPROC_ETA
-        write(99,*)
-        write(99,*) '// element and MPI slice number of the source and the station'
-        write(99,*) '// after permutation of the elements by mesh coloring'
-        write(99,*) '// and inner/outer set splitting in the mesher'
-        write(99,*) '#define RANK_SOURCE 0'
-        write(99,*) '#define NSPEC_SOURCE ',perm(NSPEC/3)
-        write(99,*)
-        write(99,*) '#define RANK_STATION (NPROC_XI*NPROC_ETA - 1)'
-        write(99,*) '#define NSPEC_STATION ',perm(2*NSPEC/3)
-        close(99)
-
-        open(unit=99,file=prname(1:len_trim(prname))//'values_from_mesher_nspec_outer.h',status='unknown')
-        write(99,*) '#define NSPEC_OUTER ',nspec_outer_max_global
-        write(99,*) '// NSPEC_OUTER_min = ',nspec_outer_min_global
-        write(99,*) '// NSPEC_OUTER_max = ',nspec_outer_max_global
-        close(99)
-
-      endif
-
-      !! DK DK and Manh Ha, Nov 2011: added this to use the new mesher in the CUDA or C / StarSs test codes
-
-      deallocate(perm)
-
-    else
-      print *,'SORT_MESH_INNER_OUTER must always been set to .true. even for the regular C version for CPUs'
-      print *,'in order to be able to use non blocking MPI to overlap communications'
-      !   print *,'generating identity permutation'
-      !   do ispec = 1,nspec
-      !     perm(ispec) = ispec
-      !   enddo
-      stop 'please set SORT_MESH_INNER_OUTER to .true. and recompile the whole code'
-
-    endif ! SORT_MESH_INNER_OUTER
-
-    !!!! David Michea: end of mesh coloring
-
-    !****************************************************************************************************
-
     ! precomputes jacobian for 2d absorbing boundary surfaces
     call get_jacobian_boundaries(myrank,iboun,nspec,xstore,ystore,zstore, &
               dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top, &
@@ -1120,33 +897,10 @@
   deallocate(muvstore,muhstore)
   deallocate(eta_anisostore)
   deallocate(ispec_is_tiso)
-  deallocate(c11store)
-  deallocate(c12store)
-  deallocate(c13store)
-  deallocate(c14store)
-  deallocate(c15store)
-  deallocate(c16store)
-  deallocate(c22store)
-  deallocate(c23store)
-  deallocate(c24store)
-  deallocate(c25store)
-  deallocate(c26store)
-  deallocate(c33store)
-  deallocate(c34store)
-  deallocate(c35store)
-  deallocate(c36store)
-  deallocate(c44store)
-  deallocate(c45store)
-  deallocate(c46store)
-  deallocate(c55store)
-  deallocate(c56store)
-  deallocate(c66store)
+  deallocate(c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
+            c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
+            c36store,c44store,c45store,c46store,c55store,c56store,c66store)
   deallocate(iboun)
-  deallocate(xigll,yigll,zigll)
-  deallocate(wxgll,wygll,wzgll)
-  deallocate(shape3D,dershape3D)
-  deallocate(shape2D_x,shape2D_y,shape2D_bottom,shape2D_top)
-  deallocate(dershape2D_x,dershape2D_y,dershape2D_bottom,dershape2D_top)
   deallocate(ibelm_xmin,ibelm_xmax,ibelm_ymin,ibelm_ymax)
   deallocate(ibelm_bottom,ibelm_top)
   deallocate(jacobian2D_xmin,jacobian2D_xmax,jacobian2D_ymin,jacobian2D_ymax)

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_regular_elements.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_regular_elements.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/create_regular_elements.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -272,7 +272,8 @@
         if (ipass == 2 .and. SAVE_BOUNDARY_MESH .and. iregion_code == IREGION_CRUST_MANTLE) then
           is_superbrick=.false.
           ispec_superbrick=0
-          call get_jacobian_discontinuities(myrank,ispec,ix_elem,iy_elem,rmin,rmax,r1,r2,r3,r4,r5,r6,r7,r8, &
+          call get_jacobian_discontinuities(myrank,ispec,ix_elem,iy_elem,rmin,rmax, &
+                   r1,r2,r3,r4,r5,r6,r7,r8, &
                    xstore(:,:,:,ispec),ystore(:,:,:,ispec),zstore(:,:,:,ispec),dershape2D_bottom, &
                    ibelm_moho_top,ibelm_moho_bot,ibelm_400_top,ibelm_400_bot,ibelm_670_top,ibelm_670_bot, &
                    normal_moho,normal_400,normal_670,jacobian2D_moho,jacobian2D_400,jacobian2D_670, &

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/fix_non_blocking_flags.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/fix_non_blocking_flags.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/fix_non_blocking_flags.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -0,0 +1,187 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! fix the non blocking arrays to assemble the slices inside each chunk: elements
+! in contact with the MPI faces by an edge or a corner only but not
+! a full face are missing, therefore let us add them
+  subroutine fix_non_blocking_slices(is_on_a_slice_edge,iboolright_xi, &
+         iboolleft_xi,iboolright_eta,iboolleft_eta, &
+         npoin2D_xi,npoin2D_eta,ibool, &
+         mask_ibool,nspec,nglob,NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX)
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: nspec,nglob,NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX
+
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi,npoin2D_eta
+
+  logical, dimension(nspec) :: is_on_a_slice_edge
+
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX) :: iboolleft_xi,iboolright_xi
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX) :: iboolleft_eta,iboolright_eta
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+  logical, dimension(nglob) :: mask_ibool
+
+  integer :: ipoin,ispec,i,j,k
+
+! clean the mask
+  mask_ibool(:) = .false.
+
+! mark all the points that are in the MPI buffers to assemble inside each chunk
+  do ipoin = 1,npoin2D_xi(1)
+    mask_ibool(iboolleft_xi(ipoin)) = .true.
+  enddo
+
+  do ipoin = 1,npoin2D_eta(1)
+    mask_ibool(iboolleft_eta(ipoin)) = .true.
+  enddo
+
+  do ipoin = 1,npoin2D_xi(2)
+    mask_ibool(iboolright_xi(ipoin)) = .true.
+  enddo
+
+  do ipoin = 1,npoin2D_eta(2)
+    mask_ibool(iboolright_eta(ipoin)) = .true.
+  enddo
+
+! now label all the elements that have at least one corner belonging
+! to any of these buffers as elements that must contribute to the
+! first step of the calculations (performed on the edges before starting
+! the non blocking communications); there is no need to examine the inside
+! of the elements, checking their eight corners is sufficient
+  do ispec = 1,nspec
+    do k = 1,NGLLZ,NGLLZ-1
+      do j  = 1,NGLLY,NGLLY-1
+        do i = 1,NGLLX,NGLLX-1
+          if(mask_ibool(ibool(i,j,k,ispec))) then
+            is_on_a_slice_edge(ispec) = .true.
+            goto 888
+          endif
+        enddo
+      enddo
+    enddo
+  888 continue
+  enddo
+
+  end subroutine fix_non_blocking_slices
+
+!-------------------------------------------------------------------------------------------------
+!-------------------------------------------------------------------------------------------------
+!-------------------------------------------------------------------------------------------------
+
+! fix the non blocking arrays to assemble the central cube: elements
+! in contact with the MPI faces by an edge or a corner only but not
+! a full face are missing, therefore let us add them
+  subroutine fix_non_blocking_central_cube(is_on_a_slice_edge, &
+         ibool,nspec,nglob,nb_msgs_theor_in_cube,ibelm_bottom_inner_core, &
+         idoubling_inner_core,npoin2D_cube_from_slices, &
+         ibool_central_cube,NSPEC2D_BOTTOM_INNER_CORE, &
+         ichunk,NPROC_XI)
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: nspec,nglob,nb_msgs_theor_in_cube,NSPEC2D_BOTTOM_INNER_CORE
+  integer :: ichunk,npoin2D_cube_from_slices,NPROC_XI
+
+  logical, dimension(nspec) :: is_on_a_slice_edge
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+  integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices) :: ibool_central_cube
+
+  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
+
+! local to global mapping
+  integer, dimension(nspec) :: idoubling_inner_core
+
+! this mask is declared as integer in the calling program because it is used elsewhere
+! to store integers, and it is reused here as a logical to save memory
+  logical, dimension(nglob) :: mask_ibool
+
+  integer :: ipoin,ispec,i,j,k,imsg,ispec2D
+
+  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+      ispec = ibelm_bottom_inner_core(ispec2D)
+      is_on_a_slice_edge(ispec) = .true.
+    enddo
+  endif
+
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+    do ispec = 1,nspec
+      if(idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE .or. &
+         idoubling_inner_core(ispec) == IFLAG_TOP_CENTRAL_CUBE) &
+        is_on_a_slice_edge(ispec) = .true.
+    enddo
+  endif
+
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+    ! clean the mask
+    mask_ibool(:) = .false.
+
+    do imsg = 1,nb_msgs_theor_in_cube
+      do ipoin = 1,npoin2D_cube_from_slices
+        if(NPROC_XI==1) then
+          if(ibool_central_cube(imsg,ipoin) > 0 ) then
+            mask_ibool(ibool_central_cube(imsg,ipoin)) = .true.
+          endif
+        else
+          mask_ibool(ibool_central_cube(imsg,ipoin)) = .true.
+        endif
+      enddo
+    enddo
+
+    ! now label all the elements that have at least one corner belonging
+    ! to any of these buffers as elements that must contribute to the
+    ! first step of the calculations (performed on the edges before starting
+    ! the non blocking communications); there is no need to examine the inside
+    ! of the elements, checking their eight corners is sufficient
+    do ispec = 1,nspec
+      do k = 1,NGLLZ,NGLLZ-1
+        do j  = 1,NGLLY,NGLLY-1
+          do i = 1,NGLLX,NGLLX-1
+            if(mask_ibool(ibool(i,j,k,ispec))) then
+              is_on_a_slice_edge(ispec) = .true.
+              goto 888
+            endif
+          enddo
+        enddo
+      enddo
+    888 continue
+    enddo
+
+  endif
+
+  end subroutine fix_non_blocking_central_cube
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/get_MPI_interfaces.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/get_MPI_interfaces.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/get_MPI_interfaces.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -0,0 +1,733 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+  subroutine get_MPI_interfaces(myrank,NGLOB,NSPEC, &
+                                    test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
+                                    num_interfaces,max_nibool_interfaces, &
+                                    max_nibool,MAX_NEIGHBOURS, &
+                                    ibool,&
+                                    is_on_a_slice_edge, &
+                                    IREGION,add_central_cube,idoubling,INCLUDE_CENTRAL_CUBE, &
+                                    xstore,ystore,zstore,NPROCTOT)
+
+  use constants,only: CUSTOM_REAL,NGLLX,NGLLY,NGLLZ,IREGION_INNER_CORE,IFLAG_IN_FICTITIOUS_CUBE
+  implicit none
+
+  integer,intent(in) :: myrank,NGLOB,NSPEC
+
+  real(kind=CUSTOM_REAL),dimension(NGLOB),intent(in) :: test_flag
+
+  integer,intent(in) :: max_nibool
+  integer,intent(in) :: MAX_NEIGHBOURS
+  integer, dimension(MAX_NEIGHBOURS),intent(inout) :: my_neighbours,nibool_neighbours
+  integer, dimension(max_nibool,MAX_NEIGHBOURS),intent(inout) :: ibool_neighbours
+
+  integer,intent(out) :: num_interfaces,max_nibool_interfaces
+
+  integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: ibool
+
+  logical,dimension(NSPEC),intent(inout) :: is_on_a_slice_edge
+
+  integer,intent(in) :: IREGION
+  logical,intent(in) :: add_central_cube
+  integer,dimension(NSPEC),intent(in) :: idoubling
+
+  logical,intent(in) :: INCLUDE_CENTRAL_CUBE
+
+  real(kind=CUSTOM_REAL),dimension(NGLOB),intent(in) :: xstore,ystore,zstore
+
+  integer :: NPROCTOT
+  
+  ! local parameters
+  integer :: ispec,iglob,j,k
+  integer :: iface,iedge,icorner
+  integer :: ii,iinterface,icurrent,rank
+  integer :: npoin
+  logical :: is_done,ispec_is_outer
+  integer,dimension(NGLOB) :: work_test_flag
+  logical,dimension(NSPEC) :: work_ispec_is_outer
+
+  integer,parameter :: MID = (NGLLX+1)/2
+
+  ! initializes
+  if( add_central_cube) then
+    ! adds points to existing inner_core interfaces
+    iinterface = num_interfaces
+    work_ispec_is_outer(:) = is_on_a_slice_edge(:)
+  else
+    ! creates new interfaces
+    iinterface = 0
+    num_interfaces = 0
+    max_nibool_interfaces = 0
+    my_neighbours(:) = -1
+    nibool_neighbours(:) = 0
+    ibool_neighbours(:,:) = 0
+    work_ispec_is_outer(:) = .false.
+  endif
+
+  ! makes working copy (converted to nearest integers)
+  work_test_flag(:) = nint( test_flag(:) )
+
+  ! loops over all elements
+  do ispec = 1,NSPEC
+
+    ! exclude elements in inner part of slice
+    !if( .not. is_on_a_slice_edge(ispec) ) cycle
+
+    ! exclude elements in fictitious core
+    if( IREGION == IREGION_INNER_CORE) then
+      if( idoubling(ispec) == IFLAG_IN_FICTITIOUS_CUBE ) cycle
+    endif
+
+    ! sets flag if element has global points shared with other processes
+    ispec_is_outer = .false.
+
+    ! 1. finds neighbours which share a whole face with this process
+    ! (faces are shared only with 1 other neighbour process)
+
+    ! loops over all faces of element
+    do iface = 1, 6
+
+      ! chooses a point inside face
+      select case( iface )
+      case( 1 )
+        ! face I == 1
+        iglob = ibool(1,MID,MID,ispec)
+      case( 2 )
+        ! face I == NGLLX
+        iglob = ibool(NGLLX,MID,MID,ispec)
+      case( 3 )
+        ! face J == 1
+        iglob = ibool(MID,1,MID,ispec)
+      case( 4 )
+        ! face J == NGLLY
+        iglob = ibool(MID,NGLLY,MID,ispec)
+      case( 5 )
+        ! face K == 1
+        iglob = ibool(MID,MID,1,ispec)
+      case( 6 )
+        ! face K == NGLLZ
+        iglob = ibool(MID,MID,NGLLZ,ispec)
+      end select
+
+      ! checks assembled flag on global point
+      if( work_test_flag(iglob) > 0 ) then
+        ispec_is_outer = .true.
+
+        ! rank of neighbor process
+        rank = work_test_flag(iglob) - 1
+
+        ! checks ranks range
+        if( rank < 0 .or. rank >= NPROCTOT ) then
+          print*,'error face rank: ',myrank,'ispec=',ispec
+          print*,'  neighbor rank = ',rank,'exceeds total nproc:',NPROCTOT
+          print*,'  face ',iface
+          call exit_mpi(myrank,'error face neighbor mpi rank')
+        endif
+
+        ! checks if already stored
+        icurrent = 0
+        is_done = .false.
+        do ii = 1,iinterface
+          if( rank == my_neighbours(ii) ) then
+            icurrent = ii
+            is_done = .true.
+            exit
+          endif
+        enddo
+
+        ! updates interfaces array
+        if( .not. is_done ) then
+          iinterface = iinterface + 1
+          if( iinterface > MAX_NEIGHBOURS ) &
+            call exit_mpi(myrank,'interface face exceeds MAX_NEIGHBOURS range')
+          ! adds as neighbor new interface
+          my_neighbours(iinterface) = rank
+          icurrent = iinterface
+        endif
+        if( icurrent == 0 ) &
+          call exit_mpi(myrank,'could not find current interface for this neighbor, please check my_neighbours')
+
+        ! adds interface points and removes neighbor flag from face
+        ! assumes NGLLX == NGLLY == NGLLZ
+        do k=1,NGLLX
+          do j=1,NGLLX
+            select case( iface )
+            case( 1 )
+              ! face I == 1
+              iglob = ibool(1,j,k,ispec)
+            case( 2 )
+              ! face I == NGLLX
+              iglob = ibool(NGLLX,j,k,ispec)
+            case( 3 )
+              ! face J == 1
+              iglob = ibool(j,1,k,ispec)
+            case( 4 )
+              ! face J == NGLLY
+              iglob = ibool(j,NGLLY,k,ispec)
+            case( 5 )
+              ! face K == 1
+              iglob = ibool(j,k,1,ispec)
+            case( 6 )
+              ! face K == NGLLZ
+              iglob = ibool(j,k,NGLLZ,ispec)
+            end select
+
+            ! checks that we take each global point (on edges and corners) only once
+            call add_interface_point(iglob,rank,icurrent, &
+                                        nibool_neighbours,MAX_NEIGHBOURS, &
+                                        ibool_neighbours,max_nibool, &
+                                        work_test_flag,NGLOB,myrank, &
+                                        .true.,add_central_cube)
+            ! debug
+            if( work_test_flag(iglob) < 0 ) then
+              if( IREGION == IREGION_INNER_CORE .and. INCLUDE_CENTRAL_CUBE ) then
+                ! we might have missed an interface point on an edge, just re-set to missing value
+                print*,'warning face flag:',myrank,'ispec=',ispec,'rank=',rank
+                print*,'  flag=',work_test_flag(iglob),'iface jk=',iface,j,k,'missed iglob=',iglob
+                !work_test_flag(iglob) = 0
+              else
+                print*,'error face flag:',myrank,'ispec=',ispec,'rank=',rank
+                print*,'  flag=',work_test_flag(iglob),'iface jk=',iface,j,k,'iglob=',iglob
+                call exit_mpi(myrank,'error face flag')
+              endif
+            endif
+
+          enddo
+        enddo
+      endif
+    enddo ! iface
+
+    ! 2. finds neighbours which share a single edge with this process
+    ! note: by now, faces have subtracted their neighbours, edges can hold only one more process info
+
+    ! loops over all edges of element
+    do iedge = 1, 12
+
+      ! chooses a point inside edge but not corner
+      select case( iedge )
+      case( 1 )
+        ! face I == 1, J == 1
+        iglob = ibool(1,1,MID,ispec)
+      case( 2 )
+        ! face I == 1, J == NGLLY
+        iglob = ibool(1,NGLLY,MID,ispec)
+      case( 3 )
+        ! face I == 1, K == 1
+        iglob = ibool(1,MID,1,ispec)
+      case( 4 )
+        ! face I == 1, K == NGLLZ
+        iglob = ibool(1,MID,NGLLZ,ispec)
+      case( 5 )
+        ! face I == NGLLX, J == 1
+        iglob = ibool(NGLLX,1,MID,ispec)
+      case( 6 )
+        ! face I == NGLLX, J == NGLLY
+        iglob = ibool(NGLLX,NGLLY,MID,ispec)
+      case( 7 )
+        ! face I == NGLLX, K == 1
+        iglob = ibool(NGLLX,MID,1,ispec)
+      case( 8 )
+        ! face I == NGLLX, K == NGLLZ
+        iglob = ibool(NGLLX,MID,NGLLZ,ispec)
+      case( 9 )
+        ! face J == 1, K == 1
+        iglob = ibool(MID,1,1,ispec)
+      case( 10 )
+        ! face J == 1, K == NGLLZ
+        iglob = ibool(MID,1,NGLLZ,ispec)
+      case( 11 )
+        ! face J == NGLLY, K == 1
+        iglob = ibool(MID,NGLLY,1,ispec)
+      case( 12 )
+        ! face J == NGLLY, K == NGLLZ
+        iglob = ibool(MID,NGLLY,NGLLZ,ispec)
+      end select
+
+      ! checks assembled flag on global point
+      if( work_test_flag(iglob) > 0 ) then
+        ispec_is_outer = .true.
+
+        ! rank of neighbor process
+        rank = work_test_flag(iglob) - 1
+
+        ! checks ranks range
+        if( rank < 0 .or. rank >= NPROCTOT ) then
+          print*,'error egde rank: ',myrank
+          print*,'  neighbor rank = ',rank,'exceeds total nproc:',NPROCTOT
+          print*,'  edge ',iedge
+          call exit_mpi(myrank,'error edge neighbor mpi rank')
+        endif
+
+        ! checks if already stored
+        icurrent = 0
+        is_done = .false.
+        do ii = 1,iinterface
+          if( rank == my_neighbours(ii) ) then
+            icurrent = ii
+            is_done = .true.
+            exit
+          endif
+        enddo
+
+        ! updates interfaces array
+        if( .not. is_done ) then
+          iinterface = iinterface + 1
+          if( iinterface > MAX_NEIGHBOURS ) &
+            call exit_mpi(myrank,'interface edge exceeds MAX_NEIGHBOURS range')
+          ! adds as neighbor new interface
+          my_neighbours(iinterface) = rank
+          icurrent = iinterface
+        endif
+        if( icurrent == 0 ) &
+          call exit_mpi(myrank,'could not find current interface for this neighbor, please check my_neighbours')
+
+        ! adds interface points and removes neighbor flag from edge
+        ! assumes NGLLX == NGLLY == NGLLZ
+        do k = 1,NGLLX
+          select case( iedge )
+          case( 1 )
+            ! face I == 1, J == 1
+            iglob = ibool(1,1,k,ispec)
+          case( 2 )
+            ! face I == 1, J == NGLLY
+            iglob = ibool(1,NGLLY,k,ispec)
+          case( 3 )
+            ! face I == 1, K == 1
+            iglob = ibool(1,k,1,ispec)
+          case( 4 )
+            ! face I == 1, K == NGLLZ
+            iglob = ibool(1,k,NGLLZ,ispec)
+          case( 5 )
+            ! face I == NGLLX, J == 1
+            iglob = ibool(NGLLX,1,k,ispec)
+          case( 6 )
+            ! face I == NGLLX, J == NGLLY
+            iglob = ibool(NGLLX,NGLLY,k,ispec)
+          case( 7 )
+            ! face I == NGLLX, K == 1
+            iglob = ibool(NGLLX,k,1,ispec)
+          case( 8 )
+            ! face I == NGLLX, K == NGLLZ
+            iglob = ibool(NGLLX,k,NGLLZ,ispec)
+          case( 9 )
+            ! face J == 1, K == 1
+            iglob = ibool(k,1,1,ispec)
+          case( 10 )
+            ! face J == 1, K == NGLLZ
+            iglob = ibool(k,1,NGLLZ,ispec)
+          case( 11 )
+            ! face J == NGLLY, K == 1
+            iglob = ibool(k,NGLLY,1,ispec)
+          case( 12 )
+            ! face J == NGLLY, K == NGLLZ
+            iglob = ibool(k,NGLLY,NGLLZ,ispec)
+          end select
+
+          ! checks that we take each global point (on edges and corners) only once
+          call add_interface_point(iglob,rank,icurrent, &
+                                        nibool_neighbours,MAX_NEIGHBOURS, &
+                                        ibool_neighbours,max_nibool, &
+                                        work_test_flag,NGLOB,myrank, &
+                                        .true.,add_central_cube)
+
+          ! debug
+          if( work_test_flag(iglob) < 0 ) then
+            if( IREGION == IREGION_INNER_CORE .and. INCLUDE_CENTRAL_CUBE ) then
+              ! we might have missed an interface point on an edge, just re-set to missing value
+              print*,'warning edge flag:',myrank,'ispec=',ispec,'rank=',rank
+              print*,'  flag=',work_test_flag(iglob),'iedge jk=',iedge,k,'missed iglob=',iglob
+              !work_test_flag(iglob) = 0
+            else
+              print*,'error edge flag:',myrank,'ispec=',ispec,'rank=',rank
+              print*,'  flag=',work_test_flag(iglob),'iedge jk=',iedge,k,'iglob=',iglob
+              call exit_mpi(myrank,'error edge flag')
+            endif
+          endif
+
+        enddo
+      endif
+    enddo ! iedge
+
+
+    ! 3. finds neighbours which share a single corner with this process
+    ! note: faces and edges have subtracted their neighbors, only one more process left possible
+
+    ! loops over all corners of element
+    do icorner = 1, 8
+
+      ! chooses a corner point
+      select case( icorner )
+      case( 1 )
+        ! face I == 1
+        iglob = ibool(1,1,1,ispec)
+      case( 2 )
+        ! face I == 1
+        iglob = ibool(1,NGLLY,1,ispec)
+      case( 3 )
+        ! face I == 1
+        iglob = ibool(1,1,NGLLZ,ispec)
+      case( 4 )
+        ! face I == 1
+        iglob = ibool(1,NGLLY,NGLLZ,ispec)
+      case( 5 )
+        ! face I == NGLLX
+        iglob = ibool(NGLLX,1,1,ispec)
+      case( 6 )
+        ! face I == NGLLX
+        iglob = ibool(NGLLX,NGLLY,1,ispec)
+      case( 7 )
+        ! face I == NGLLX
+        iglob = ibool(NGLLX,1,NGLLZ,ispec)
+      case( 8 )
+        ! face I == NGLLX
+        iglob = ibool(NGLLX,NGLLY,NGLLZ,ispec)
+      end select
+
+      ! makes sure that all elements on mpi interfaces are included
+      ! uses original test_flag array, since the working copy reduces values
+      ! note: there can be elements which have an edge or corner shared with
+      !          other mpi partitions, but have the work_test_flag value already set to zero
+      !          since the iglob point was found before.
+      !          also, this check here would suffice to determine the outer flag, but we also include the
+      !          check everywhere we encounter it too
+      if( test_flag(iglob) > 0.5 ) then
+        ispec_is_outer = .true.
+      endif
+
+      ! checks assembled flag on global point
+      if( work_test_flag(iglob) > 0 ) then
+        ispec_is_outer = .true.
+
+        ! rank of neighbor process
+        rank = work_test_flag(iglob) - 1
+
+        ! checks ranks range
+        if( rank < 0 .or. rank >= NPROCTOT ) then
+          print*,'error corner: ',myrank
+          print*,'  neighbor rank = ',rank,'exceeds total nproc:',NPROCTOT
+          print*,'  corner ',icorner
+          call exit_mpi(myrank,'error corner neighbor mpi rank')
+        endif
+
+        ! checks if already stored
+        icurrent = 0
+        is_done = .false.
+        do ii = 1,iinterface
+          if( rank == my_neighbours(ii) ) then
+            icurrent = ii
+            is_done = .true.
+            exit
+          endif
+        enddo
+
+        ! updates interfaces array
+        if( .not. is_done ) then
+          iinterface = iinterface + 1
+          if( iinterface > MAX_NEIGHBOURS ) &
+            call exit_mpi(myrank,'interface corner exceed MAX_NEIGHBOURS range')
+          ! adds as neighbor new interface
+          my_neighbours(iinterface) = rank
+          icurrent = iinterface
+        endif
+        if( icurrent == 0 ) &
+          call exit_mpi(myrank,'could not find current interface for this neighbor, please check my_neighbours')
+
+        ! adds this corner as interface point and removes neighbor flag from face,
+        ! checks that we take each global point (on edges and corners) only once
+        call add_interface_point(iglob,rank,icurrent, &
+                                    nibool_neighbours,MAX_NEIGHBOURS, &
+                                    ibool_neighbours,max_nibool, &
+                                    work_test_flag,NGLOB,myrank, &
+                                    .false.,add_central_cube)
+
+        ! debug
+        if( work_test_flag(iglob) < 0 ) call exit_mpi(myrank,'error corner flag')
+
+      endif
+
+    enddo ! icorner
+
+    ! stores flags for outer elements when recognized as such
+    ! (inner/outer elements separated for non-blocking mpi communications)
+    if( ispec_is_outer ) then
+      work_ispec_is_outer(ispec) = .true.
+    endif
+
+  enddo
+
+  ! number of outer elements (on MPI interfaces)
+  npoin = count( work_ispec_is_outer )
+
+  ! debug: user output
+  if( add_central_cube ) then
+    print*, 'rank',myrank,'interfaces : ',iinterface
+    do j=1,iinterface
+      print*, '  my_neighbours: ',my_neighbours(j),nibool_neighbours(j)
+    enddo
+    print*, '  test flag min/max: ',minval(work_test_flag),maxval(work_test_flag)
+    print*, '  outer elements: ',npoin
+    print*
+  endif
+
+  ! checks if all points were recognized
+  if( minval(work_test_flag) < 0 .or. maxval(work_test_flag) > 0 ) then
+    print*,'error mpi interface rank: ',myrank
+    print*,'  work_test_flag min/max :',minval(work_test_flag),maxval(work_test_flag)
+    call exit_mpi(myrank,'error: mpi points remain unrecognized, please check mesh interfaces')
+  endif
+
+  ! sets interfaces infos
+  num_interfaces = iinterface
+  max_nibool_interfaces = maxval( nibool_neighbours(1:num_interfaces) )
+
+  ! checks if unique set of neighbours
+  do ii = 1,num_interfaces-1
+    rank = my_neighbours(ii)
+    do j = ii+1,num_interfaces
+      if( rank == my_neighbours(j) ) then
+        print*,'test MPI: rank ',myrank,'my_neighbours:',rank,my_neighbours(j),'interfaces:',ii,j
+        call exit_mpi(myrank,'error test my_neighbours not unique')
+      endif
+    enddo
+  enddo
+
+  ! sorts buffers obtained to be conforming with neighbors in other slices
+  do iinterface = 1,num_interfaces
+    ! sorts ibool values in increasing order
+    ! used to check if we have duplicates in array
+    npoin = nibool_neighbours(iinterface)
+    call heap_sort( npoin, ibool_neighbours(1:npoin,iinterface) )
+
+    ! checks if unique set of iglob values
+    do j=1,npoin-1
+      if( ibool_neighbours(j,iinterface) == ibool_neighbours(j+1,iinterface) ) then
+        if( IREGION == IREGION_INNER_CORE .and. INCLUDE_CENTRAL_CUBE ) then
+          ! missing points might have been counted more than once
+          if( ibool_neighbours(j,iinterface) > 0 ) then
+            print*,'warning mpi interface rank:',myrank
+            print*,'  interface: ',my_neighbours(iinterface),'point: ',j,'of',npoin,'iglob=',ibool_neighbours(j,iinterface)
+            ! decrease number of points
+            nibool_neighbours(iinterface) = nibool_neighbours(iinterface) - 1
+            if( nibool_neighbours(iinterface) <= 0 ) then
+              print*,'error zero mpi interface rank:',myrank,'interface=',my_neighbours(iinterface)
+              call exit_mpi(myrank,'error: zero mpi points on interface')
+            endif
+            ! shift values
+            do k = j+1,npoin-1
+              ii = ibool_neighbours(k+1,iinterface)
+              ibool_neighbours(k,iinterface) = ii
+            enddo
+            ! re-sets values
+            ibool_neighbours(npoin,iinterface) = 0
+            npoin = nibool_neighbours(iinterface)
+            max_nibool_interfaces = maxval( nibool_neighbours(1:num_interfaces) )
+          endif
+        else
+          print*,'error mpi interface rank:',myrank
+          print*,'  interface: ',my_neighbours(iinterface),'point: ',j,'of',npoin,'iglob=',ibool_neighbours(j,iinterface)
+          call exit_mpi(myrank,'error: mpi points not unique on interface')
+        endif
+      endif
+    enddo
+
+    ! sort buffer obtained to be conforming with neighbor in other chunk
+    npoin = nibool_neighbours(iinterface)
+    call sort_MPI_interface(myrank,npoin,ibool_neighbours(1:npoin,iinterface), &
+                                NGLOB,xstore,ystore,zstore)
+
+  enddo
+
+  ! re-sets flags for outer elements
+  is_on_a_slice_edge(:) = work_ispec_is_outer(:)
+
+  end subroutine get_MPI_interfaces
+  
+!
+!-------------------------------------------------------------------------------------------------
+!
+  
+  subroutine sort_MPI_interface(myrank,npoin,ibool_n, &
+                                    NGLOB,xstore,ystore,zstore)
+
+  use constants,only: CUSTOM_REAL
+  
+  implicit none
+  
+  integer,intent(in) :: myrank,npoin
+  integer,dimension(npoin),intent(inout) :: ibool_n
+
+  integer,intent(in) :: NGLOB
+  real(kind=CUSTOM_REAL), dimension(NGLOB) :: xstore,ystore,zstore  
+
+  ! local parameters
+  ! arrays for sorting routine
+  double precision, dimension(:), allocatable :: work
+  double precision, dimension(:), allocatable :: xstore_selected,ystore_selected,zstore_selected
+  integer, dimension(:), allocatable :: ibool_selected  
+  integer, dimension(:), allocatable :: ind,ninseg,iglob,locval,iwork
+  logical, dimension(:), allocatable :: ifseg
+  integer :: nglob_selected,i,ipoin,ier
+  
+  ! allocate arrays for buffers with maximum size
+  allocate(ibool_selected(npoin), &
+          xstore_selected(npoin), &
+          ystore_selected(npoin), &
+          zstore_selected(npoin), &
+          ind(npoin), &
+          ninseg(npoin), &
+          iglob(npoin), &
+          locval(npoin), &
+          ifseg(npoin), &
+          iwork(npoin), &
+          work(npoin),stat=ier)
+  if( ier /= 0 ) call exit_MPI(myrank,'error sort MPI interface: allocating temporary sorting arrays')
+  
+  ! sets up working arrays
+  do i=1,npoin
+    ipoin = ibool_n(i)
+    
+    ibool_selected(i) = ipoin
+    xstore_selected(i) = xstore(ipoin)
+    ystore_selected(i) = ystore(ipoin)
+    zstore_selected(i) = zstore(ipoin)
+  enddo
+  
+  ! sort buffer obtained to be conforming with neighbor in other chunk
+  ! sort on x, y and z, the other arrays will be swapped as well
+  call sort_array_coordinates(npoin,xstore_selected,ystore_selected,zstore_selected, &
+                             ibool_selected,iglob,locval,ifseg,nglob_selected, &
+                             ind,ninseg,iwork,work)
+
+  ! check that no duplicate has been detected
+  if(nglob_selected /= npoin) call exit_MPI(myrank,'error sort MPI interface: duplicates detected in buffer')
+
+  ! stores new ibool ordering
+  ibool_n(1:npoin) = ibool_selected(1:npoin)
+
+  ! frees array memory
+  deallocate(ibool_selected,xstore_selected,ystore_selected,zstore_selected, &
+            ind,ninseg,iglob,locval,ifseg,iwork,work)
+
+              
+  end subroutine sort_MPI_interface
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine add_interface_point(iglob,rank,icurrent, &
+                                    nibool_neighbours,MAX_NEIGHBOURS, &
+                                    ibool_neighbours,max_nibool, &
+                                    work_test_flag,NGLOB,myrank, &
+                                    is_face_edge,add_central_cube)
+
+
+  implicit none
+
+  integer,intent(in) :: iglob,rank,icurrent
+  integer,intent(in) :: myrank
+
+  integer,intent(in) :: MAX_NEIGHBOURS,max_nibool
+  integer, dimension(MAX_NEIGHBOURS),intent(inout) :: nibool_neighbours
+  integer, dimension(max_nibool,MAX_NEIGHBOURS),intent(inout) :: ibool_neighbours
+
+  integer,intent(in) :: NGLOB
+  integer,dimension(NGLOB) :: work_test_flag
+
+  logical,intent(in) :: is_face_edge,add_central_cube
+
+  ! local parameters
+  integer :: i
+  logical :: is_done
+
+  ! let's check and be sure for central cube
+  !if( work_test_flag(iglob) <= 0 ) cycle ! continues to next point
+
+  ! checks that we take each global point (on edges and corners) only once
+  is_done = .false.
+  do i=1,nibool_neighbours(icurrent)
+    if( ibool_neighbours(i,icurrent) == iglob ) then
+      is_done = .true.
+      exit
+    endif
+  enddo
+
+  ! checks if anything to do
+  if( is_done ) then
+    ! special handling for central cube: removes rank if already added in inner core
+    if( add_central_cube ) then
+      if( is_face_edge .and. work_test_flag(iglob) < (rank + 1) ) then
+        ! re-sets if we missed this rank number
+        work_test_flag(iglob) = work_test_flag(iglob) + (rank + 1)
+      endif
+      ! re-sets flag
+      work_test_flag(iglob) = work_test_flag(iglob) - ( rank + 1 )
+      if( is_face_edge .and. work_test_flag(iglob) < 0 ) then
+        ! re-sets to zero if we missed this rank number
+        if( work_test_flag(iglob) == - (rank + 1 ) ) work_test_flag(iglob) = 0
+      endif
+    endif
+    return
+  endif
+
+  ! checks if flag was set correctly
+  if( work_test_flag(iglob) <= 0 ) then
+    ! we might have missed an interface point on an edge, just re-set to missing value
+    print*,'warning ',myrank,' flag: missed rank=',rank
+    print*,'  flag=',work_test_flag(iglob),'missed iglob=',iglob,'interface=',icurrent
+    print*
+  endif
+  ! we might have missed an interface point on an edge, just re-set to missing value
+  if( is_face_edge ) then
+    if( work_test_flag(iglob) < (rank + 1) ) then
+      ! re-sets if we missed this rank number
+      work_test_flag(iglob) = work_test_flag(iglob) + (rank + 1)
+    endif
+  endif
+
+  ! adds point
+  ! increases number of total points on this interface
+  nibool_neighbours(icurrent) = nibool_neighbours(icurrent) + 1
+  if( nibool_neighbours(icurrent) > max_nibool) &
+      call exit_mpi(myrank,'interface face exceeds max_nibool range')
+
+  ! stores interface iglob index
+  ibool_neighbours( nibool_neighbours(icurrent),icurrent ) = iglob
+
+  ! re-sets flag
+  work_test_flag(iglob) = work_test_flag(iglob) - ( rank + 1 )
+
+  ! checks
+  if( is_face_edge .and. work_test_flag(iglob) < 0 ) then
+    ! re-sets to zero if we missed this rank number
+    if( work_test_flag(iglob) == - (rank + 1 ) ) work_test_flag(iglob) = 0
+  endif
+
+  end subroutine add_interface_point
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/heap_sort.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/heap_sort.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/heap_sort.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -0,0 +1,98 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+  subroutine heap_sort( N, array )
+
+! heap sort algorithm
+! sorts integer array (in increasing order, like 1 - 5 - 6 - 9 - 12 - 13 - 14 -...)
+
+  implicit none
+  integer,intent(in) :: N
+  integer,dimension(N),intent(inout) :: array
+
+  ! local parameters
+  integer :: tmp
+  integer :: i
+
+  ! checks if anything to do
+  if( N < 2 ) return
+
+  ! builds heap
+  do i = N/2, 1, -1
+    call heap_sort_siftdown(N,array,i,N)
+  enddo
+
+  ! sorts array
+  do i = N, 2, -1
+    ! swaps last and first entry in this section
+    tmp = array(1)
+    array(1) = array(i)
+    array(i) = tmp
+    call heap_sort_siftdown(N,array,1,i-1)
+  enddo
+
+  end subroutine heap_sort
+
+!
+!----
+!
+
+  subroutine heap_sort_siftdown(N,array,start,bottom)
+
+  implicit none
+
+  integer,intent(in):: N
+  integer,dimension(N),intent(inout) :: array
+  integer :: start,bottom
+
+  ! local parameters
+  integer :: i,j
+  integer :: tmp
+
+  i = start
+  tmp = array(i)
+  j = 2*i
+  do while( j <= bottom )
+    ! chooses larger value first in this section
+    if( j < bottom ) then
+      if( array(j) <= array(j+1) ) j = j + 1
+    endif
+
+    ! checks if section already smaller than inital value
+    if( array(j) < tmp ) exit
+
+    array(i) = array(j)
+    i = j
+    j = 2*i
+  enddo
+
+  array(i) = tmp
+  return
+
+  end subroutine heap_sort_siftdown
+

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/meshfem3D.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/meshfem3D.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/meshfem3D.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -348,6 +348,9 @@
   ! creates meshes for regions crust/mantle, outer core and inner core
   call create_meshes()
 
+  ! setup mpi communication interfaces
+  call create_MPI_interfaces()
+  
   ! outputs mesh infos and saves new header file
   call finalize_mesher()
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/meshfem3D_models.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/meshfem3D_models.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/meshfem3D_models.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -25,18 +25,28 @@
 !
 !=====================================================================
 
+module constants
 
-  module meshfem3D_models_par
+  include "constants.h"
 
+end module constants
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+module meshfem3D_models_par
+
 !---
 !
 ! ADD YOUR MODEL HERE
 !
 !---
 
+  use constants
+  
   implicit none
 
-  include "constants.h"
 
 ! model_aniso_mantle_variables
   type model_aniso_mantle_variables
@@ -432,7 +442,7 @@
 ! to create a reference model based on 1D_REF but with 3D crust and 410/660 topography
   logical,parameter :: USE_1D_REFERENCE = .false.
 
-  end module meshfem3D_models_par
+end module meshfem3D_models_par
 
 
 !

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/meshfem3D_par.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/meshfem3D_par.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/meshfem3D_par.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -38,14 +38,6 @@
   ! correct number of spectral elements in each block depending on chunk type
   integer nspec_tiso,npointot
 
-  ! parameters needed to store the radii of the grid points
-  ! in the spherically symmetric Earth
-  integer, dimension(:), allocatable :: idoubling
-  integer, dimension(:,:,:,:), allocatable :: ibool
-
-  ! arrays with the mesh in double precision
-  double precision, dimension(:,:,:,:), allocatable :: xstore,ystore,zstore
-
   ! proc numbers for MPI
   integer myrank,sizeprocs
 
@@ -54,8 +46,6 @@
   double precision area_local_top
   double precision volume_local,volume_total
 
-  !integer iprocnum
-
   ! for loop on all the slices
   integer iregion_code
   integer iproc_xi,iproc_eta,ichunk

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/read_arrays_buffers_mesher.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/read_arrays_buffers_mesher.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/read_arrays_buffers_mesher.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -0,0 +1,337 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine read_arrays_buffers_mesher(iregion_code,myrank, &
+                               iboolleft_xi,iboolright_xi,iboolleft_eta,iboolright_eta, &
+                               npoin2D_xi,npoin2D_eta, &
+                               iprocfrom_faces,iprocto_faces,imsg_type, &
+                               iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+                               iboolfaces,npoin2D_faces,iboolcorner, &
+                               NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY,NGLOB1D_RADIAL, &
+                               NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT,NPROC_XI,NPROC_ETA,LOCAL_PATH,NCHUNKS)
+
+  implicit none
+
+! standard include of the MPI library
+  include 'mpif.h'
+
+  include "constants.h"
+
+  integer iregion_code,myrank,NCHUNKS,ier
+
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi,npoin2D_eta
+  integer NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY,NGLOB1D_RADIAL
+  integer NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT,NPROC_XI,NPROC_ETA
+
+  integer npoin2D_faces(NUMFACES_SHARED)
+
+  character(len=150) LOCAL_PATH
+
+  integer, dimension(NGLOB2DMAX_XY,NUMFACES_SHARED) :: iboolfaces
+  integer, dimension(NGLOB1D_RADIAL,NUMCORNERS_SHARED) :: iboolcorner
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX) :: iboolleft_xi,iboolright_xi
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX) :: iboolleft_eta,iboolright_eta
+
+  integer, dimension(NUMMSGS_FACES) :: iprocfrom_faces,iprocto_faces,imsg_type
+
+! allocate array for messages for corners
+  integer, dimension(NCORNERSCHUNKS) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+  integer npoin2D_xi_mesher,npoin2D_eta_mesher
+  integer npoin1D_corner
+
+  integer imsg,icount_faces,icount_corners
+  integer ipoin1D,ipoin2D
+
+  double precision xdummy,ydummy,zdummy
+
+! processor identification
+  character(len=150) OUTPUT_FILES,prname,filename
+
+! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
+
+! get the base pathname for output files
+  call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', 'OUTPUT_FILES')
+
+! create the name for the database of the current slide and region
+  call create_name_database(prname,myrank,iregion_code,LOCAL_PATH)
+
+! read 2-D addressing for summation between slices along xi with MPI
+
+! read iboolleft_xi of this slice
+  open(unit=IIN,file=prname(1:len_trim(prname))//'iboolleft_xi.txt', &
+        status='old',action='read',iostat=ier)
+  if( ier /= 0 ) call exit_MPI(myrank,'error opening iboolleft_xi file')
+
+  npoin2D_xi(1) = 1
+ 350  continue
+  read(IIN,*) iboolleft_xi(npoin2D_xi(1)),xdummy,ydummy,zdummy
+  if(iboolleft_xi(npoin2D_xi(1)) > 0) then
+      npoin2D_xi(1) = npoin2D_xi(1) + 1
+      goto 350
+  endif
+! subtract the line that contains the flag after the last point
+  npoin2D_xi(1) = npoin2D_xi(1) - 1
+! read nb of points given by the mesher
+  read(IIN,*) npoin2D_xi_mesher
+  if(npoin2D_xi(1) > NGLOB2DMAX_XMIN_XMAX .or. npoin2D_xi(1) /= npoin2D_xi_mesher) &
+      call exit_MPI(myrank,'incorrect iboolleft_xi read')
+  close(IIN)
+
+! read iboolright_xi of this slice
+  open(unit=IIN,file=prname(1:len_trim(prname))//'iboolright_xi.txt', &
+        status='old',action='read',iostat=ier)
+  if( ier /= 0 ) call exit_MPI(myrank,'error opening iboolright_xi file')
+
+  npoin2D_xi(2) = 1
+ 360  continue
+  read(IIN,*) iboolright_xi(npoin2D_xi(2)),xdummy,ydummy,zdummy
+  if(iboolright_xi(npoin2D_xi(2)) > 0) then
+      npoin2D_xi(2) = npoin2D_xi(2) + 1
+      goto 360
+  endif
+! subtract the line that contains the flag after the last point
+  npoin2D_xi(2) = npoin2D_xi(2) - 1
+! read nb of points given by the mesher
+  read(IIN,*) npoin2D_xi_mesher
+  if(npoin2D_xi(2) > NGLOB2DMAX_XMIN_XMAX .or. npoin2D_xi(2) /= npoin2D_xi_mesher) &
+      call exit_MPI(myrank,'incorrect iboolright_xi read')
+  close(IIN)
+
+  if(myrank == 0) then
+    write(IMAIN,*) '  #max of points in MPI buffers along xi npoin2D_xi = ', &
+                                maxval(npoin2D_xi(:))
+    write(IMAIN,*) '  #max of array elements transferred npoin2D_xi*NDIM = ', &
+                                maxval(npoin2D_xi(:))*NDIM
+    write(IMAIN,*)
+  endif
+
+! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
+
+! read 2-D addressing for summation between slices along eta with MPI
+
+! read iboolleft_eta of this slice
+  open(unit=IIN,file=prname(1:len_trim(prname))//'iboolleft_eta.txt', &
+        status='old',action='read',iostat=ier)
+  if( ier /= 0 ) call exit_MPI(myrank,'error opening iboolleft_eta file')
+
+  npoin2D_eta(1) = 1
+ 370  continue
+  read(IIN,*) iboolleft_eta(npoin2D_eta(1)),xdummy,ydummy,zdummy
+  if(iboolleft_eta(npoin2D_eta(1)) > 0) then
+      npoin2D_eta(1) = npoin2D_eta(1) + 1
+      goto 370
+  endif
+! subtract the line that contains the flag after the last point
+  npoin2D_eta(1) = npoin2D_eta(1) - 1
+! read nb of points given by the mesher
+  read(IIN,*) npoin2D_eta_mesher
+  if(npoin2D_eta(1) > NGLOB2DMAX_YMIN_YMAX .or. npoin2D_eta(1) /= npoin2D_eta_mesher) &
+      call exit_MPI(myrank,'incorrect iboolleft_eta read')
+  close(IIN)
+
+! read iboolright_eta of this slice
+  open(unit=IIN,file=prname(1:len_trim(prname))//'iboolright_eta.txt', &
+        status='old',action='read',iostat=ier)
+  if( ier /= 0 ) call exit_MPI(myrank,'error opening iboolright_eta file')
+
+  npoin2D_eta(2) = 1
+ 380  continue
+  read(IIN,*) iboolright_eta(npoin2D_eta(2)),xdummy,ydummy,zdummy
+  if(iboolright_eta(npoin2D_eta(2)) > 0) then
+      npoin2D_eta(2) = npoin2D_eta(2) + 1
+      goto 380
+  endif
+! subtract the line that contains the flag after the last point
+  npoin2D_eta(2) = npoin2D_eta(2) - 1
+! read nb of points given by the mesher
+  read(IIN,*) npoin2D_eta_mesher
+  if(npoin2D_eta(2) > NGLOB2DMAX_YMIN_YMAX .or. npoin2D_eta(2) /= npoin2D_eta_mesher) &
+      call exit_MPI(myrank,'incorrect iboolright_eta read')
+  close(IIN)
+
+  if(myrank == 0) then
+    write(IMAIN,*) '  #max of points in MPI buffers along eta npoin2D_eta = ', &
+                                maxval(npoin2D_eta(:))
+    write(IMAIN,*) '  #max of array elements transferred npoin2D_eta*NDIM = ', &
+                                maxval(npoin2D_eta(:))*NDIM
+    write(IMAIN,*)
+  endif
+
+
+!! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
+
+! read chunk messages only if more than one chunk
+  if(NCHUNKS /= 1) then
+
+! read messages to assemble between chunks with MPI
+
+  if(myrank == 0) then
+
+    ! file with the list of processors for each message for faces
+    open(unit=IIN,file=trim(OUTPUT_FILES)//'/list_messages_faces.txt', &
+          status='old',action='read',iostat=ier)
+    if( ier /= 0 ) call exit_MPI(myrank,'error opening list_messages_faces file')
+
+    do imsg = 1,NUMMSGS_FACES
+      read(IIN,*) imsg_type(imsg),iprocfrom_faces(imsg),iprocto_faces(imsg)
+      if      (iprocfrom_faces(imsg) < 0 &
+          .or. iprocto_faces(imsg) < 0 &
+          .or. iprocfrom_faces(imsg) > NPROCTOT-1 &
+          .or. iprocto_faces(imsg) > NPROCTOT-1) &
+        call exit_MPI(myrank,'incorrect chunk faces numbering')
+      if (imsg_type(imsg) < 1 .or. imsg_type(imsg) > 3) &
+        call exit_MPI(myrank,'incorrect message type labeling')
+    enddo
+    close(IIN)
+
+    ! file with the list of processors for each message for corners
+    open(unit=IIN,file=trim(OUTPUT_FILES)//'/list_messages_corners.txt', &
+          status='old',action='read',iostat=ier)
+    if( ier /= 0 ) call exit_MPI(myrank,'error opening list_messages_corners file')
+
+    do imsg = 1,NCORNERSCHUNKS
+      read(IIN,*) iproc_master_corners(imsg),iproc_worker1_corners(imsg), &
+                            iproc_worker2_corners(imsg)
+      if    (iproc_master_corners(imsg) < 0 &
+        .or. iproc_worker1_corners(imsg) < 0 &
+        .or. iproc_worker2_corners(imsg) < 0 &
+        .or. iproc_master_corners(imsg) > NPROCTOT-1 &
+        .or. iproc_worker1_corners(imsg) > NPROCTOT-1 &
+        .or. iproc_worker2_corners(imsg) > NPROCTOT-1) &
+        call exit_MPI(myrank,'incorrect chunk corner numbering')
+    enddo
+    close(IIN)
+
+  endif
+
+! broadcast the information read on the master to the nodes
+  call MPI_BCAST(imsg_type,NUMMSGS_FACES,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+  call MPI_BCAST(iprocfrom_faces,NUMMSGS_FACES,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+  call MPI_BCAST(iprocto_faces,NUMMSGS_FACES,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+
+  call MPI_BCAST(iproc_master_corners,NCORNERSCHUNKS,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+  call MPI_BCAST(iproc_worker1_corners,NCORNERSCHUNKS,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+  call MPI_BCAST(iproc_worker2_corners,NCORNERSCHUNKS,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+  if( ier /= 0 ) call exit_MPI(myrank,'error mpi broadcast')
+
+
+!---- read indirect addressing for each message for faces of the chunks
+!---- a given slice can belong to at most two faces
+  icount_faces = 0
+  do imsg = 1,NUMMSGS_FACES
+    if(myrank == iprocfrom_faces(imsg) .or. myrank == iprocto_faces(imsg)) then
+      icount_faces = icount_faces + 1
+
+      if(icount_faces > NUMFACES_SHARED) then
+        print*,'error ',myrank,' icount_faces: ',icount_faces,'NUMFACES_SHARED:',NUMFACES_SHARED
+        print*,'iregion_code:',iregion_code
+        call exit_MPI(myrank,'more than NUMFACES_SHARED faces for this slice')
+      endif
+      if(icount_faces > 2 .and. (NPROC_XI > 1 .or. NPROC_ETA > 1)) then
+        print*,'error ',myrank,' icount_faces: ',icount_faces,'NPROC_XI:',NPROC_XI,'NPROC_ETA:',NPROC_ETA
+        print*,'iregion_code:',iregion_code
+        call exit_MPI(myrank,'more than two faces for this slice')
+      endif
+
+      ! read file with 2D buffer for faces
+      if(myrank == iprocfrom_faces(imsg)) then
+        write(filename,"('buffer_faces_chunks_sender_msg',i6.6,'.txt')") imsg
+      else if(myrank == iprocto_faces(imsg)) then
+        write(filename,"('buffer_faces_chunks_receiver_msg',i6.6,'.txt')") imsg
+      endif
+
+      open(unit=IIN,file=prname(1:len_trim(prname))//filename,status='old',action='read',iostat=ier)
+      if( ier /= 0 ) call exit_MPI(myrank,'error opening buffer_faces file')
+
+      read(IIN,*) npoin2D_faces(icount_faces)
+      if(npoin2D_faces(icount_faces) > NGLOB2DMAX_XY) then
+        print*,'error ',myrank,' npoin2D_faces: ',npoin2D_faces(icount_faces),icount_faces
+        print*,'iregion_code:',iregion_code
+        call exit_MPI(myrank,'incorrect nb of points in face buffer')
+      endif
+
+      do ipoin2D = 1,npoin2D_faces(icount_faces)
+        read(IIN,*) iboolfaces(ipoin2D,icount_faces),xdummy,ydummy,zdummy
+      enddo
+      close(IIN)
+
+    endif
+  enddo
+
+
+!---- read indirect addressing for each message for corners of the chunks
+!---- a given slice can belong to at most one corner
+  icount_corners = 0
+  do imsg = 1,NCORNERSCHUNKS
+    ! if only two chunks then there is no second worker
+    if(myrank == iproc_master_corners(imsg) .or. &
+         myrank == iproc_worker1_corners(imsg) .or. &
+         (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) then
+
+      icount_corners = icount_corners + 1
+      if(icount_corners>1 .and. (NPROC_XI > 1 .or. NPROC_ETA > 1)) then
+        print*,'error ',myrank,'icount_corners:',icount_corners
+        print*,'iregion_code:',iregion_code
+        call exit_MPI(myrank,'more than one corner for this slice')
+      endif
+      if(icount_corners>4) call exit_MPI(myrank,'more than four corners for this slice')
+
+      ! read file with 1D buffer for corner
+      if(myrank == iproc_master_corners(imsg)) then
+        write(filename,"('buffer_corners_chunks_master_msg',i6.6,'.txt')") imsg
+      else if(myrank == iproc_worker1_corners(imsg)) then
+        write(filename,"('buffer_corners_chunks_worker1_msg',i6.6,'.txt')") imsg
+      else if( NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg)) then
+        write(filename,"('buffer_corners_chunks_worker2_msg',i6.6,'.txt')") imsg
+      endif
+
+      ! matching codes
+      open(unit=IIN,file=prname(1:len_trim(prname))//filename, &
+            status='old',action='read',iostat=ier)
+      if( ier /= 0 ) call exit_MPI(myrank,'error opening buffer_corners_chunks file')
+
+      read(IIN,*) npoin1D_corner
+      if(npoin1D_corner /= NGLOB1D_RADIAL) then
+        print*,'error ',myrank,' npoin1D_corner: ',npoin1D_corner,'NGLOB1D_RADIAL:',NGLOB1D_RADIAL
+        print*,'iregion_code:',iregion_code
+        call exit_MPI(myrank,'incorrect nb of points in corner buffer')
+      endif
+      do ipoin1D = 1,npoin1D_corner
+        read(IIN,*) iboolcorner(ipoin1D,icount_corners),xdummy,ydummy,zdummy
+      enddo
+      close(IIN)
+
+    endif
+
+
+  enddo
+
+  endif
+
+  end subroutine read_arrays_buffers_mesher
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_color_perm.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_color_perm.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_color_perm.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -0,0 +1,255 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine setup_color_perm(myrank,iregion_code,nspec,nglob, &
+                              ibool,is_on_a_slice_edge,prname, &
+                              npoin2D_xi,npoin2D_eta)
+  
+  use constants
+  use meshfem3D_par,only: NSTEP,DT,NPROC_XI,NPROC_ETA
+  implicit none
+
+  ! standard include of the MPI library
+  include 'mpif.h'
+
+  integer :: myrank
+  integer :: iregion_code
+
+  integer :: nspec,nglob
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+  ! this for non blocking MPI
+  logical, dimension(nspec) :: is_on_a_slice_edge
+
+  ! name of the database file
+  character(len=150) :: prname
+
+  integer :: npoin2D_xi,npoin2D_eta
+  
+  ! local parameters
+  integer :: nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer
+  integer, dimension(:), allocatable :: perm
+  integer, dimension(:), allocatable :: first_elem_number_in_this_color
+  integer, dimension(:), allocatable :: num_of_elems_in_this_color
+
+  integer :: icolor,ispec_counter
+  integer :: nspec_outer_min_global,nspec_outer_max_global
+  integer :: ispec,ier
+
+  !!!! David Michea: detection of the edges, coloring and permutation separately
+  allocate(perm(nspec))
+
+  ! implement mesh coloring for GPUs if needed, to create subsets of disconnected elements
+  ! to remove dependencies and the need for atomic operations in the sum of elemental contributions in the solver
+  if(USE_MESH_COLORING_GPU) then
+
+    ! user output
+    if(myrank == 0 ) write(IMAIN,*) '  creating mesh coloring'
+
+    allocate(first_elem_number_in_this_color(MAX_NUMBER_OF_COLORS + 1))
+
+    call get_perm_color_faster(is_on_a_slice_edge,ibool,perm,nspec,nglob, &
+                              nb_colors_outer_elements,nb_colors_inner_elements,nspec_outer, &
+                              first_elem_number_in_this_color,myrank)
+
+    ! for the last color, the next color is fictitious and its first (fictitious) element number is nspec + 1
+    first_elem_number_in_this_color(nb_colors_outer_elements + nb_colors_inner_elements + 1) = nspec + 1
+
+    allocate(num_of_elems_in_this_color(nb_colors_outer_elements + nb_colors_inner_elements))
+
+    ! save mesh coloring
+    open(unit=99,file=prname(1:len_trim(prname))//'num_of_elems_in_this_color.dat', &
+         status='unknown',iostat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error opening num_of_elems_in_this_color file')
+
+    ! number of colors for outer elements
+    write(99,*) nb_colors_outer_elements
+
+    ! number of colors for inner elements
+    write(99,*) nb_colors_inner_elements
+
+    ! number of elements in each color
+    do icolor = 1, nb_colors_outer_elements + nb_colors_inner_elements
+      num_of_elems_in_this_color(icolor) = first_elem_number_in_this_color(icolor+1) &
+                                          - first_elem_number_in_this_color(icolor)
+      write(99,*) num_of_elems_in_this_color(icolor)
+    enddo
+    close(99)
+
+    ! check that the sum of all the numbers of elements found in each color is equal
+    ! to the total number of elements in the mesh
+    if(sum(num_of_elems_in_this_color) /= nspec) then
+      print *,'nspec = ',nspec
+      print *,'total number of elements in all the colors of the mesh = ',sum(num_of_elems_in_this_color)
+      call exit_mpi(myrank,'incorrect total number of elements in all the colors of the mesh')
+    endif
+
+    ! check that the sum of all the numbers of elements found in each color for the outer elements is equal
+    ! to the total number of outer elements found in the mesh
+    if(sum(num_of_elems_in_this_color(1:nb_colors_outer_elements)) /= nspec_outer) then
+      print *,'nspec_outer = ',nspec_outer
+      print *,'total number of elements in all the colors of the mesh for outer elements = ', &
+        sum(num_of_elems_in_this_color)
+      call exit_mpi(myrank,'incorrect total number of elements in all the colors of the mesh for outer elements')
+    endif
+
+    call MPI_ALLREDUCE(nspec_outer,nspec_outer_min_global,1,MPI_INTEGER,MPI_MIN,MPI_COMM_WORLD,ier)
+    call MPI_ALLREDUCE(nspec_outer,nspec_outer_max_global,1,MPI_INTEGER,MPI_MAX,MPI_COMM_WORLD,ier)
+
+    deallocate(first_elem_number_in_this_color)
+    deallocate(num_of_elems_in_this_color)
+
+  else
+
+    !! DK DK for regular C + MPI version for CPUs: do not use colors but nonetheless put all the outer elements
+    !! DK DK first in order to be able to overlap non-blocking MPI communications with calculations
+
+    !! DK DK nov 2010, for Rosa Badia / StarSs:
+    !! no need for mesh coloring, but need to implement inner/outer subsets for non blocking MPI for StarSs
+    ispec_counter = 0
+    perm(:) = 0
+
+    ! first generate all the outer elements
+    do ispec = 1,nspec
+      if(is_on_a_slice_edge(ispec)) then
+        ispec_counter = ispec_counter + 1
+        perm(ispec) = ispec_counter
+      endif
+    enddo
+
+    ! make sure we have detected some outer elements
+    if(ispec_counter <= 0) stop 'fatal error: no outer elements detected!'
+
+    ! store total number of outer elements
+    nspec_outer = ispec_counter
+
+    ! then generate all the inner elements
+    do ispec = 1,nspec
+      if(.not. is_on_a_slice_edge(ispec)) then
+        ispec_counter = ispec_counter + 1
+        perm(ispec) = ispec_counter
+      endif
+    enddo
+
+    ! test that all the elements have been used once and only once
+    if(ispec_counter /= nspec) stop 'fatal error: ispec_counter not equal to nspec'
+
+    ! do basic checks
+    if(minval(perm) /= 1) stop 'minval(perm) should be 1'
+    if(maxval(perm) /= nspec) stop 'maxval(perm) should be nspec'
+
+    call MPI_ALLREDUCE(nspec_outer,nspec_outer_min_global,1,MPI_INTEGER,MPI_MIN,MPI_COMM_WORLD,ier)
+    call MPI_ALLREDUCE(nspec_outer,nspec_outer_max_global,1,MPI_INTEGER,MPI_MAX,MPI_COMM_WORLD,ier)
+
+  endif ! USE_MESH_COLORING_GPU
+
+  !! DK DK and Manh Ha, Nov 2011: added this to use the new mesher in the CUDA or C / StarSs test codes
+
+  if (myrank == 0 .and. iregion_code == IREGION_CRUST_MANTLE) then
+    ! write a header file for the Fortran version of the solver
+    open(unit=99,file=prname(1:len_trim(prname))//'values_from_mesher_f90.h', &
+          status='unknown',iostat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error opening file values_from_mesher_f90.h')
+    
+    write(99,*) 'integer, parameter :: NSPEC = ',nspec
+    write(99,*) 'integer, parameter :: NGLOB = ',nglob
+    !!! DK DK use 1000 time steps only for the scaling tests
+    write(99,*) 'integer, parameter :: NSTEP = 1000 !!!!!!!!!!! ',nstep
+    write(99,*) 'real(kind=4), parameter :: deltat = ',DT
+    write(99,*)
+    write(99,*) 'integer, parameter ::  NGLOB2DMAX_XMIN_XMAX = ',npoin2D_xi
+    write(99,*) 'integer, parameter ::  NGLOB2DMAX_YMIN_YMAX = ',npoin2D_eta
+    write(99,*) 'integer, parameter ::  NGLOB2DMAX_ALL = ',max(npoin2D_xi,npoin2D_eta)
+    write(99,*) 'integer, parameter ::  NPROC_XI = ',NPROC_XI
+    write(99,*) 'integer, parameter ::  NPROC_ETA = ',NPROC_ETA
+    write(99,*)
+    write(99,*) '! element number of the source and of the station'
+    write(99,*) '! after permutation of the elements by mesh coloring'
+    write(99,*) '! and inner/outer set splitting in the mesher'
+    write(99,*) 'integer, parameter :: NSPEC_SOURCE = ',perm(NSPEC/3)
+    write(99,*) 'integer, parameter :: RANK_SOURCE = 0'
+    write(99,*)
+    write(99,*) 'integer, parameter :: RANK_STATION = (NPROC_XI*NPROC_ETA - 1)'
+    write(99,*) 'integer, parameter :: NSPEC_STATION = ',perm(2*NSPEC/3)
+
+    ! save coordinates of the seismic source
+    !   write(99,*) xstore(2,2,2,10);
+    !   write(99,*) ystore(2,2,2,10);
+    !   write(99,*) zstore(2,2,2,10);
+
+    ! save coordinates of the seismic station
+    !   write(99,*) xstore(2,2,2,nspec-10);
+    !   write(99,*) ystore(2,2,2,nspec-10);
+    !   write(99,*) zstore(2,2,2,nspec-10);
+    close(99)
+
+    !! write a header file for the C version of the solver
+    open(unit=99,file=prname(1:len_trim(prname))//'values_from_mesher_C.h', &
+          status='unknown',iostat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error opening file values_from_mesher_C.h')
+    
+    write(99,*) '#define NSPEC ',nspec
+    write(99,*) '#define NGLOB ',nglob
+    !!    write(99,*) '#define NSTEP ',nstep
+    !!! DK DK use 1000 time steps only for the scaling tests
+    write(99,*) '// #define NSTEP ',nstep
+    write(99,*) '#define NSTEP 1000'
+    ! put an "f" at the end to force single precision
+    write(99,"('#define deltat ',e18.10,'f')") DT
+    write(99,*) '#define NGLOB2DMAX_XMIN_XMAX ',npoin2D_xi
+    write(99,*) '#define NGLOB2DMAX_YMIN_YMAX ',npoin2D_eta
+    write(99,*) '#define NGLOB2DMAX_ALL ',max(npoin2D_xi,npoin2D_eta)
+    write(99,*) '#define NPROC_XI ',NPROC_XI
+    write(99,*) '#define NPROC_ETA ',NPROC_ETA
+    write(99,*)
+    write(99,*) '// element and MPI slice number of the source and the station'
+    write(99,*) '// after permutation of the elements by mesh coloring'
+    write(99,*) '// and inner/outer set splitting in the mesher'
+    write(99,*) '#define RANK_SOURCE 0'
+    write(99,*) '#define NSPEC_SOURCE ',perm(NSPEC/3)
+    write(99,*)
+    write(99,*) '#define RANK_STATION (NPROC_XI*NPROC_ETA - 1)'
+    write(99,*) '#define NSPEC_STATION ',perm(2*NSPEC/3)
+    close(99)
+
+    open(unit=99,file=prname(1:len_trim(prname))//'values_from_mesher_nspec_outer.h', &
+          status='unknown',iostat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error opening values_from_mesher_nspec_outer.h file')
+    
+    write(99,*) '#define NSPEC_OUTER ',nspec_outer_max_global
+    write(99,*) '// NSPEC_OUTER_min = ',nspec_outer_min_global
+    write(99,*) '// NSPEC_OUTER_max = ',nspec_outer_max_global
+    close(99)
+
+  endif
+
+  !! DK DK and Manh Ha, Nov 2011: added this to use the new mesher in the CUDA or C / StarSs test codes
+
+  deallocate(perm)
+
+
+  end subroutine setup_color_perm

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_model.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_model.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/setup_model.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -68,7 +68,8 @@
     write(IMAIN,*) 'model setup successfully read in'
     write(IMAIN,*)
   endif
-
+  call sync_all()
+  
   end subroutine setup_model
 
 !

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/test_MPI_interfaces.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/test_MPI_interfaces.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/meshfem3D/test_MPI_interfaces.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -0,0 +1,568 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+  subroutine test_MPI_neighbours(iregion_code, &
+                                     num_interfaces,max_nibool_interfaces, &
+                                     my_neighbours,nibool_interfaces, &
+                                     ibool_interfaces)
+
+  use constants
+  use meshfem3D_par,only: NPROCTOT,myrank
+  use create_MPI_interfaces_par,only: NGLOB_CRUST_MANTLE,NGLOB_OUTER_CORE,NGLOB_INNER_CORE  
+  implicit none
+
+  include 'mpif.h'
+
+  integer,intent(in) :: iregion_code
+  integer,intent(in) :: num_interfaces,max_nibool_interfaces
+  integer,dimension(num_interfaces),intent(in) :: my_neighbours,nibool_interfaces
+  integer,dimension(max_nibool_interfaces,num_interfaces),intent(in):: ibool_interfaces
+  
+  ! local parameters
+  integer,dimension(:),allocatable :: dummy_i
+  integer,dimension(:,:),allocatable :: test_interfaces
+  integer,dimension(:,:),allocatable :: test_interfaces_nibool
+  integer :: msg_status(MPI_STATUS_SIZE)
+  integer :: ineighbour,iproc,inum,i,j,ier,ipoints,max_num,iglob
+  logical :: is_okay
+  logical,dimension(:),allocatable :: mask
+  
+  ! daniel: debug output
+  !do iproc=0,NPROCTOT-1
+  !  if( myrank == iproc ) then    
+  !    print*, 'mpi rank',myrank,'interfaces : ',num_interfaces,'region',iregion_code
+  !    do j=1,num_interfaces
+  !      print*, '  my_neighbours: ',my_neighbours(j),nibool_interfaces(j)
+  !    enddo
+  !    print*
+  !  endif
+  !  call sync_all()
+  !enddo
+  
+  ! checks maximum number of interface points
+  if( max_nibool_interfaces == 0 .and. NPROCTOT > 1 ) then
+    print*,'test MPI: rank ',myrank,'max_nibool_interfaces is zero'
+    call exit_mpi(myrank,'error test max_nibool_interfaces zero')
+  endif
+  
+  ! allocates global mask
+  select case(iregion_code)
+  case( IREGION_CRUST_MANTLE )
+    allocate(mask(NGLOB_CRUST_MANTLE))
+  case( IREGION_OUTER_CORE )
+    allocate(mask(NGLOB_OUTER_CORE))
+  case( IREGION_INNER_CORE )
+    allocate(mask(NGLOB_INNER_CORE))
+  case default
+    call exit_mpi(myrank,'error test MPI: iregion_code not recognized')
+  end select
+  
+  ! test ibool entries
+  ! (must be non-zero and unique)
+  do i = 1,num_interfaces
+    ! number of interface points
+    if( nibool_interfaces(i) > max_nibool_interfaces ) then
+      print*,'error test MPI: rank',myrank,'nibool values:',nibool_interfaces(i),max_nibool_interfaces
+      call exit_mpi(myrank,'error test MPI: nibool exceeds max_nibool_interfaces')
+    endif
+    
+    mask(:) = .false.
+    
+    ! ibool entries  
+    do j = 1,nibool_interfaces(i)
+      iglob = ibool_interfaces(j,i)
+      
+      ! checks zero entry
+      if( iglob <= 0 ) then
+        print*,'error test MPI: rank ',myrank,'ibool value:',iglob,'interface:',i,'point:',j
+        call exit_mpi(myrank,'error test MPI: ibool values invalid')
+      endif
+      
+      ! checks duplicate
+      if( j < nibool_interfaces(i) ) then
+        if( iglob == ibool_interfaces(j+1,i) ) then
+          print*,'error test MPI: rank',myrank,'ibool duplicate:',iglob,'interface:',i,'point:',j
+          call exit_mpi(myrank,'error test MPI: ibool duplicates')
+        endif
+      endif
+      
+      ! checks if unique global value
+      if( .not. mask(iglob) ) then
+        mask(iglob) = .true.
+      else
+        print*,'error test MPI: rank',myrank,'ibool masked:',iglob,'interface:',i,'point:',j
+        call exit_mpi(myrank,'error test MPI: ibool masked already')
+      endif      
+    enddo
+  enddo
+  deallocate(mask)
+  
+  ! checks neighbors
+  ! gets maximum interfaces from all processes
+  call MPI_REDUCE(num_interfaces,max_num,1,MPI_INTEGER,MPI_MAX,0,MPI_COMM_WORLD,ier)
+
+  ! master gathers infos
+  if( myrank == 0 ) then
+    ! array for gathering infos
+    allocate(test_interfaces(max_num,0:NPROCTOT),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_interfaces')
+    test_interfaces = -1
+
+    allocate(test_interfaces_nibool(max_num,0:NPROCTOT),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_interfaces_nibool')
+    test_interfaces_nibool = 0
+
+    ! used to store number of interfaces per proc
+    allocate(dummy_i(0:NPROCTOT),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating dummy_i for test interfaces')
+    dummy_i = 0
+
+    ! sets infos for master process
+    test_interfaces(1:num_interfaces,0) = my_neighbours(1:num_interfaces)
+    test_interfaces_nibool(1:num_interfaces,0) = nibool_interfaces(1:num_interfaces)
+    dummy_i(0) = num_interfaces
+
+    ! collects from other processes
+    do iproc=1,NPROCTOT-1
+      ! gets number of interfaces
+      call MPI_RECV(inum,1,MPI_INTEGER,iproc,itag,MPI_COMM_WORLD,msg_status,ier)
+      dummy_i(iproc) = inum
+      if( inum > 0 ) then
+        call MPI_RECV(test_interfaces(1:inum,iproc),inum, &
+                      MPI_INTEGER,iproc,itag,MPI_COMM_WORLD,msg_status,ier)
+        call MPI_RECV(test_interfaces_nibool(1:inum,iproc),inum, &
+                      MPI_INTEGER,iproc,itag,MPI_COMM_WORLD,msg_status,ier)
+      endif
+    enddo
+  else
+    ! sends infos to master process
+    call MPI_SEND(num_interfaces,1,MPI_INTEGER,0,itag,MPI_COMM_WORLD,ier)
+    if( num_interfaces > 0 ) then
+      call MPI_SEND(my_neighbours(1:num_interfaces),num_interfaces, &
+                    MPI_INTEGER,0,itag,MPI_COMM_WORLD,ier)
+      call MPI_SEND(nibool_interfaces(1:num_interfaces),num_interfaces, &
+                    MPI_INTEGER,0,itag,MPI_COMM_WORLD,ier)
+    endif
+  endif
+  call sync_all()
+
+  ! checks if addressing is okay
+  if( myrank == 0 ) then
+    ! for each process
+    do iproc=0,NPROCTOT-1
+      ! loops over all neighbors
+      do i=1,dummy_i(iproc)
+        ! gets neighbour rank and number of points on interface with it
+        ineighbour = test_interfaces(i,iproc)
+        ipoints = test_interfaces_nibool(i,iproc)
+
+        ! checks values
+        if( ineighbour < 0 .or. ineighbour > NPROCTOT-1 ) then
+          print*,'error neighbour:',iproc,ineighbour
+          call exit_mpi(myrank,'error ineighbour')
+        endif
+        if( ipoints <= 0 ) then
+          print*,'error neighbour points:',iproc,ipoints
+          call exit_mpi(myrank,'error ineighbour points')
+        endif
+
+        ! looks up corresponding entry in neighbour array
+        is_okay = .false.
+        do j=1,dummy_i(ineighbour)
+          if( test_interfaces(j,ineighbour) == iproc ) then
+            ! checks if same number of interface points with this neighbour
+            if( test_interfaces_nibool(j,ineighbour) == ipoints ) then
+              is_okay = .true.
+            else
+              print*,'error ',iproc,'neighbour ',ineighbour,' points =',ipoints
+              print*,'  ineighbour has points = ',test_interfaces_nibool(j,ineighbour)
+              print*
+              call exit_mpi(myrank,'error ineighbour points differ')
+            endif
+            exit
+          endif
+        enddo
+        if( .not. is_okay ) then
+          print*,'error ',iproc,' neighbour not found: ',ineighbour
+          print*,'iproc ',iproc,' interfaces:'
+          print*,test_interfaces(1:dummy_i(iproc),iproc)
+          print*,'ineighbour ',ineighbour,' interfaces:'
+          print*,test_interfaces(1:dummy_i(ineighbour),ineighbour)
+          print*
+          call exit_mpi(myrank,'error ineighbour not found')
+        endif
+      enddo
+    enddo
+
+    ! user output
+    write(IMAIN,*) '  mpi addressing maximum interfaces:',maxval(dummy_i)
+    write(IMAIN,*) '  mpi addressing : all interfaces okay'
+    write(IMAIN,*)
+
+    deallocate(dummy_i)
+    deallocate(test_interfaces)
+  endif
+  call sync_all()
+
+  end subroutine test_MPI_neighbours
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine test_MPI_cm()
+
+  use meshfem3D_par,only: NPROCTOT,myrank
+  use create_MPI_interfaces_par
+  implicit none
+
+  include 'mpif.h'
+
+  ! local parameters
+  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: test_flag_vector
+  integer :: i,j,iglob,ier
+  integer :: inum,icount,ival
+  integer :: num_unique,num_max_valence
+  integer,dimension(:),allocatable :: valence
+  
+  ! crust mantle
+  allocate(test_flag_vector(NDIM,NGLOB_CRUST_MANTLE),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array test_flag etc.'
+  allocate(valence(NGLOB_CRUST_MANTLE),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array valence'
+
+  ! points defined by interfaces
+  valence(:) = 0
+  test_flag_vector(:,:) = 0.0  
+  do i=1,num_interfaces_crust_mantle
+    do j=1,nibool_interfaces_crust_mantle(i)
+      iglob = ibool_interfaces_crust_mantle(j,i)      
+      ! sets flag on
+      test_flag_vector(1,iglob) = 1.0_CUSTOM_REAL
+      ! counts valence (occurrences)
+      valence(iglob) = valence(iglob) + 1
+    enddo
+  enddo
+  ! total number of  interface points
+  i = sum(nibool_interfaces_crust_mantle)  
+  call MPI_REDUCE(i,inum,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
+  
+  ! total number of unique points (some could be shared between different processes)
+  i = nint( sum(test_flag_vector) )
+  num_unique= i
+  call MPI_REDUCE(i,icount,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
+  
+  ! maximum valence
+  i = maxval( valence(:) )
+  num_max_valence = i
+  call MPI_REDUCE(i,ival,1,MPI_INTEGER,MPI_MAX,0,MPI_COMM_WORLD,ier)
+    
+  ! user output
+  if( myrank == 0 ) then
+    write(IMAIN,*) '  total MPI interface points : ',inum
+    write(IMAIN,*) '  unique MPI interface points: ',icount
+    write(IMAIN,*) '  maximum valence            : ',ival
+  endif
+
+  ! initializes for assembly
+  test_flag_vector(:,:) = 1.0_CUSTOM_REAL
+
+  ! adds contributions from different partitions to flag arrays
+  call assemble_MPI_vector_ext_mesh(NPROCTOT,NGLOB_CRUST_MANTLE, &
+                      test_flag_vector, &
+                      num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+                      nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle,&
+                      my_neighbours_crust_mantle)
+
+  ! removes initial flag
+  test_flag_vector(:,:) = test_flag_vector(:,:) - 1.0_CUSTOM_REAL
+
+  ! checks number of interface points
+  i = 0
+  do iglob=1,NGLOB_CRUST_MANTLE
+    ! only counts flags with MPI contributions
+    if( test_flag_vector(1,iglob) > 0.0 ) i = i + 1
+    
+    ! checks valence
+    if( valence(iglob) /= nint(test_flag_vector(1,iglob)) .or. &
+       valence(iglob) /= nint(test_flag_vector(2,iglob)) .or. &
+       valence(iglob) /= nint(test_flag_vector(3,iglob)) ) then
+      print*,'error test MPI: rank',myrank,'valence:',valence(iglob),'flag:',test_flag_vector(:,:)
+      call exit_mpi(myrank,'error test MPI crust mantle valence')
+    endif
+  enddo
+
+  ! checks within slice
+  if( i /= num_unique ) then
+    print*,'error test crust mantle : rank',myrank,'unique mpi points:',i,num_unique
+    call exit_mpi(myrank,'error MPI assembly crust mantle')  
+  endif
+    
+  ! total number of assembly points
+  call MPI_REDUCE(i,inum,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
+
+  ! points defined by interfaces
+  if( myrank == 0 ) then
+    ! checks
+    if( inum /= icount ) then
+      print*,'error crust mantle : total mpi points:',myrank,'total: ',inum,icount
+      call exit_mpi(myrank,'error MPI assembly crust mantle')
+    endif
+    
+    ! user output
+    write(IMAIN,*) '  total unique MPI interface points:',inum
+    write(IMAIN,*)    
+  endif
+
+  deallocate(test_flag_vector)
+  deallocate(valence)
+
+  call sync_all()
+
+  end subroutine test_MPI_cm
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine test_MPI_oc()
+
+  use meshfem3D_par,only: NPROCTOT,myrank
+  use create_MPI_interfaces_par
+  implicit none
+
+  include 'mpif.h'
+
+  ! local parameters
+  real(kind=CUSTOM_REAL),dimension(:),allocatable :: test_flag
+  integer :: i,j,iglob,ier
+  integer :: inum,icount,ival
+  integer :: num_max_valence,num_unique
+  integer,dimension(:),allocatable :: valence
+
+  ! outer core
+  allocate(test_flag(NGLOB_OUTER_CORE),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array test_flag etc.'
+  allocate(valence(NGLOB_OUTER_CORE),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array valence'
+
+  ! points defined by interfaces
+  valence(:) = 0
+  test_flag = 0.0
+  do i=1,num_interfaces_outer_core
+    do j=1,nibool_interfaces_outer_core(i)
+      iglob = ibool_interfaces_outer_core(j,i)
+      test_flag(iglob) = 1.0_CUSTOM_REAL
+      ! counts valence (occurrences)
+      valence(iglob) = valence(iglob) + 1
+    enddo
+  enddo
+  i = sum(nibool_interfaces_outer_core)
+  call MPI_REDUCE(i,inum,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
+  
+  i = nint( sum(test_flag) )
+  num_unique = i
+  call MPI_REDUCE(i,icount,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
+  
+  ! maximum valence
+  i = maxval( valence(:) )
+  num_max_valence = i
+  call MPI_REDUCE(i,ival,1,MPI_INTEGER,MPI_MAX,0,MPI_COMM_WORLD,ier)
+  
+  if( myrank == 0 ) then
+    write(IMAIN,*) '  total MPI interface points : ',inum
+    write(IMAIN,*) '  unique MPI interface points: ',icount
+    write(IMAIN,*) '  maximum valence            : ',ival    
+  endif
+
+  ! initialized for assembly
+  test_flag(:) = 1.0_CUSTOM_REAL
+
+  ! adds contributions from different partitions to flag arrays
+  call assemble_MPI_scalar_ext_mesh(NPROCTOT,NGLOB_OUTER_CORE, &
+                                test_flag, &
+                                num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+                                nibool_interfaces_outer_core,ibool_interfaces_outer_core,&
+                                my_neighbours_outer_core)
+
+
+  ! removes initial flag
+  test_flag(:) = test_flag(:) - 1.0_CUSTOM_REAL
+
+  ! checks number of interface points
+  i = 0
+  do iglob=1,NGLOB_OUTER_CORE
+    ! only counts flags with MPI contributions
+    if( test_flag(iglob) > 0.0 ) i = i + 1
+
+    ! checks valence
+    if( valence(iglob) /= nint(test_flag(iglob)) ) then
+      print*,'error test MPI: rank',myrank,'valence:',valence(iglob),'flag:',test_flag(iglob)
+      call exit_mpi(myrank,'error test outer core valence')
+    endif    
+  enddo
+
+  ! checks within slice
+  if( i /= num_unique ) then
+    print*,'error test outer core : rank',myrank,'unique mpi points:',i,num_unique
+    call exit_mpi(myrank,'error MPI assembly outer core')  
+  endif
+  
+  call MPI_REDUCE(i,inum,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
+
+  ! output
+  if( myrank == 0 ) then
+    ! checks
+    if( inum /= icount ) then
+      print*,'error outer core : total mpi points:',myrank,'total: ',inum,icount
+      call exit_mpi(myrank,'error MPI assembly outer_core')
+    endif
+
+    ! user output
+    write(IMAIN,*) '  total assembled MPI interface points:',inum
+    write(IMAIN,*)    
+  endif
+
+  deallocate(test_flag)
+  deallocate(valence)
+
+  call sync_all()
+
+  end subroutine test_MPI_oc
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine test_MPI_ic()
+
+  use meshfem3D_par,only: NPROCTOT,myrank
+  use create_MPI_interfaces_par
+  implicit none
+
+  include 'mpif.h'
+
+  ! local parameters
+  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: test_flag_vector
+  integer :: i,j,iglob,ier
+  integer :: inum,icount,ival
+  integer :: num_unique,num_max_valence  
+  integer,dimension(:),allocatable :: valence
+
+  ! inner core
+  allocate(test_flag_vector(NDIM,NGLOB_INNER_CORE),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array test_flag etc.'
+  allocate(valence(NGLOB_INNER_CORE),stat=ier)
+  if( ier /= 0 ) stop 'error allocating array valence'
+
+  ! points defined by interfaces
+  valence(:) = 0  
+  test_flag_vector(:,:) = 0.0
+  do i=1,num_interfaces_inner_core
+    do j=1,nibool_interfaces_inner_core(i)
+      iglob = ibool_interfaces_inner_core(j,i)
+      ! sets flag on
+      test_flag_vector(1,iglob) = 1.0_CUSTOM_REAL
+      ! counts valence (occurrences)
+      valence(iglob) = valence(iglob) + 1
+    enddo
+  enddo
+  i = sum(nibool_interfaces_inner_core)
+  call MPI_REDUCE(i,inum,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
+  
+  i = nint( sum(test_flag_vector) )
+  num_unique= i  
+  call MPI_REDUCE(i,icount,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
+  
+  ! maximum valence
+  i = maxval( valence(:) )
+  num_max_valence = i
+  call MPI_REDUCE(i,ival,1,MPI_INTEGER,MPI_MAX,0,MPI_COMM_WORLD,ier)
+  
+  if( myrank == 0 ) then
+    write(IMAIN,*) '  total MPI interface points : ',inum
+    write(IMAIN,*) '  unique MPI interface points: ',icount
+    write(IMAIN,*) '  maximum valence            : ',ival    
+  endif
+
+  ! initializes for assembly
+  test_flag_vector = 1.0_CUSTOM_REAL
+
+  ! adds contributions from different partitions to flag arrays
+  call assemble_MPI_vector_ext_mesh(NPROCTOT,NGLOB_INNER_CORE, &
+                      test_flag_vector, &
+                      num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+                      nibool_interfaces_inner_core,ibool_interfaces_inner_core,&
+                      my_neighbours_inner_core)
+
+  ! removes initial flag
+  test_flag_vector(:,:) = test_flag_vector(:,:) - 1.0_CUSTOM_REAL
+
+  ! checks number of interface points
+  i = 0
+  do iglob=1,NGLOB_INNER_CORE
+    ! only counts flags with MPI contributions
+    if( test_flag_vector(1,iglob) > 0.0 ) i = i + 1
+
+    ! checks valence
+    if( valence(iglob) /= nint(test_flag_vector(1,iglob)) .or. &
+       valence(iglob) /= nint(test_flag_vector(2,iglob)) .or. &
+       valence(iglob) /= nint(test_flag_vector(3,iglob)) ) then
+      print*,'error test MPI: rank',myrank,'valence:',valence(iglob),'flag:',test_flag_vector(:,:)
+      call exit_mpi(myrank,'error test MPI inner core valence')
+    endif
+    
+  enddo
+
+  ! checks within slice
+  if( i /= num_unique ) then
+    print*,'error test inner core : rank',myrank,'unique mpi points:',i,num_unique
+    call exit_mpi(myrank,'error MPI assembly inner core')  
+  endif
+  
+  call MPI_REDUCE(i,inum,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
+
+  if( myrank == 0 ) then
+    ! checks
+    if( inum /= icount ) then
+      print*,'error inner core : total mpi points:',myrank,'total: ',inum,icount
+      call exit_mpi(myrank,'error MPI assembly inner core')
+    endif
+
+    ! user output
+    write(IMAIN,*) '  total assembled MPI interface points:',inum
+    write(IMAIN,*)    
+  endif
+
+  deallocate(test_flag_vector)
+  deallocate(valence)
+
+  call sync_all()
+
+  end subroutine test_MPI_ic

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/count_elements.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/count_elements.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/count_elements.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -48,7 +48,7 @@
   include "constants.h"
 
 
-! parameters to be computed based upon parameters above read from file
+  ! parameters to be computed based upon parameters above read from file
   integer NPROC,NEX_XI,NEX_ETA,NEX_PER_PROC_XI,NEX_PER_PROC_ETA,ratio_divide_central_cube
 
   integer, dimension(MAX_NUM_REGIONS) :: NSPEC,NSPEC2D_XI,NSPEC2D_ETA, &
@@ -66,7 +66,7 @@
               nb_lay_sb, nspec_sb, nglob_surf
 
 
-! for the cut doublingbrick improvement
+  ! for the cut doublingbrick improvement
   logical :: CUT_SUPERBRICK_XI,CUT_SUPERBRICK_ETA
   logical :: INCLUDE_CENTRAL_CUBE
   integer :: last_doubling_layer

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/define_all_layers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/define_all_layers.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/define_all_layers.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -32,7 +32,8 @@
                         NER_TOP_CENTRAL_CUBE_ICB,&
                         RMIDDLE_CRUST,R220,R400,R600,R670,R771,RTOPDDOUBLEPRIME,RCMB,RICB, &
                         R_CENTRAL_CUBE,RMOHO_FICTITIOUS_IN_MESHER,R80_FICTITIOUS_IN_MESHER,&
-                        ONE_CRUST,ner,ratio_sampling_array,&
+                        ONE_CRUST, &
+                        ner,ratio_sampling_array,&
                         NUMBER_OF_MESH_LAYERS,layer_offset,last_doubling_layer, &
                         r_bottom,r_top,this_region_has_a_doubling,&
                         ielem,elem_doubling_mantle,elem_doubling_middle_outer_core,&
@@ -51,31 +52,33 @@
 
   include "constants.h"
 
-! parameters read from parameter file
+  ! parameters read from parameter file
   integer NER_CRUST,NER_80_MOHO,NER_220_80,NER_400_220,NER_600_400,NER_670_600,NER_771_670, &
           NER_TOPDDOUBLEPRIME_771,NER_CMB_TOPDDOUBLEPRIME,NER_OUTER_CORE, &
           NER_TOP_CENTRAL_CUBE_ICB
-  integer NUMBER_OF_MESH_LAYERS,layer_offset,last_doubling_layer
-
+  
+  ! radii
   double precision RMIDDLE_CRUST,R220,R400,R600,R670,R771,RTOPDDOUBLEPRIME,RCMB,RICB, &
           R_CENTRAL_CUBE,RMOHO_FICTITIOUS_IN_MESHER,R80_FICTITIOUS_IN_MESHER
 
   logical ONE_CRUST
 
+  ! layers
+  integer :: NUMBER_OF_MESH_LAYERS,layer_offset,last_doubling_layer
   integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: ner,ratio_sampling_array
+
   double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: r_bottom,r_top
   logical, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: this_region_has_a_doubling
 
+  ! doubling elements
   integer :: ielem,elem_doubling_mantle,elem_doubling_middle_outer_core,elem_doubling_bottom_outer_core
   double precision :: DEPTH_SECOND_DOUBLING_REAL,DEPTH_THIRD_DOUBLING_REAL, &
                           DEPTH_FOURTH_DOUBLING_REAL,distance,distance_min,zval
 
-
   integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: doubling_index
   double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: rmins,rmaxs
 
 
-
 ! find element below top of which we should implement the second doubling in the mantle
 ! locate element closest to optimal value
   distance_min = HUGEVAL

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/exit_mpi.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -32,12 +32,12 @@
 
   implicit none
 
-! standard include of the MPI library
+  ! standard include of the MPI library
   include 'mpif.h'
 
   include "constants.h"
 
-! identifier for error message file
+  ! identifier for error message file
   integer, parameter :: IERROR = 30
 
   integer :: myrank
@@ -47,11 +47,11 @@
   character(len=80) outputname
   character(len=150) OUTPUT_FILES
 
-! write error message to screen
+  ! write error message to screen
   write(*,*) error_msg(1:len(error_msg))
   write(*,*) 'Error detected, aborting MPI... proc ',myrank
 
-! write error message to file
+  ! write error message to file
   call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', 'OUTPUT_FILES')
   write(outputname,"('/error_message',i6.6,'.txt')") myrank
   open(unit=IERROR,file=trim(OUTPUT_FILES)//outputname,status='unknown')
@@ -59,17 +59,17 @@
   write(IERROR,*) 'Error detected, aborting MPI... proc ',myrank
   close(IERROR)
 
-! close output file
+  ! close output file
   if(myrank == 0 .and. IMAIN /= ISTANDARD_OUTPUT) close(IMAIN)
 
-! stop all the MPI processes, and exit
-! note: MPI_ABORT does not return, and does exit the
-!          program with an error code of 30
+  ! stop all the MPI processes, and exit
+  ! note: MPI_ABORT does not return, and does exit the
+  !          program with an error code of 30
   call MPI_ABORT(MPI_COMM_WORLD,30,ier)
 
-! otherwise: there is no standard behaviour to exit with an error code in fortran,
-! however most compilers do recognize this as an error code stop statement;
-! to check stop code in terminal: > echo $?
+  ! otherwise: there is no standard behaviour to exit with an error code in fortran,
+  ! however most compilers do recognize this as an error code stop statement;
+  ! to check stop code in terminal: > echo $?
   stop 30
 
   ! or just exit with message:
@@ -78,7 +78,7 @@
   end subroutine exit_MPI
 
 !
-!----
+!-------------------------------------------------------------------------------------------------
 !
 
 ! version without rank number printed in the error message
@@ -86,7 +86,7 @@
 
   implicit none
 
-! standard include of the MPI library
+  ! standard include of the MPI library
   include 'mpif.h'
 
   include "constants.h"
@@ -95,25 +95,25 @@
 
   integer :: ier
 
-! write error message to screen
+  ! write error message to screen
   write(*,*) error_msg(1:len(error_msg))
   write(*,*) 'Error detected, aborting MPI...'
 
-! stop all the MPI processes, and exit
+  ! stop all the MPI processes, and exit
   call MPI_ABORT(MPI_COMM_WORLD,30,ier)
   stop 'error, program ended in exit_MPI'
 
   end subroutine exit_MPI_without_rank
 
 !
-!----
+!-------------------------------------------------------------------------------------------------
 !
 
   subroutine sync_all()
 
   implicit none
 
-! standard include of the MPI library
+  ! standard include of the MPI library
   include 'mpif.h'
 
   integer :: ier,rank
@@ -126,3 +126,73 @@
   if( ier /= 0 ) call exit_mpi(rank,'error synchronize MPI processes')
 
   end subroutine sync_all
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine irecv_cr(recvbuf, recvcount, dest, recvtag, req)
+
+  implicit none
+
+  ! standard include of the MPI library
+  include 'mpif.h'
+
+  include "constants.h"
+  include "precision.h"
+
+  integer recvcount, dest, recvtag, req
+  real(kind=CUSTOM_REAL), dimension(recvcount) :: recvbuf
+
+  integer ier
+
+  call MPI_IRECV(recvbuf(1),recvcount,CUSTOM_MPI_TYPE,dest,recvtag, &
+                  MPI_COMM_WORLD,req,ier)
+
+  end subroutine irecv_cr
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine isend_cr(sendbuf, sendcount, dest, sendtag, req)
+
+  implicit none
+
+  ! standard include of the MPI library
+  include 'mpif.h'
+
+  include "constants.h"
+  include "precision.h"
+
+  integer sendcount, dest, sendtag, req
+  real(kind=CUSTOM_REAL), dimension(sendcount) :: sendbuf
+
+  integer ier
+
+  call MPI_ISEND(sendbuf(1),sendcount,CUSTOM_MPI_TYPE,dest,sendtag, &
+                  MPI_COMM_WORLD,req,ier)
+
+  end subroutine isend_cr
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine wait_req(req)
+
+  implicit none
+
+  ! standard include of the MPI library
+  include 'mpif.h'
+
+  integer :: req
+
+  integer, dimension(MPI_STATUS_SIZE) :: req_mpi_status
+
+  integer :: ier
+
+  call mpi_wait(req,req_mpi_status,ier)
+
+  end subroutine wait_req
+

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/get_timestep_and_layers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/get_timestep_and_layers.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/get_timestep_and_layers.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -393,7 +393,6 @@
 
   ! following models need special attention, at least for global simulations:
   if( NCHUNKS == 6 ) then
-
     ! makes time step smaller for this ref model, otherwise becomes unstable in fluid
     if (REFERENCE_1D_MODEL == REFERENCE_MODEL_IASP91) &
       DT = DT*(1.d0 - 0.3d0)
@@ -403,13 +402,6 @@
       ! DT = DT*(1.d0 - 0.1d0) not working yet...
       stop 'anisotropic inner core - unstable feature, uncomment this line in get_timestep_and_layers.f90'
     endif
-
-!daniel: debug
-    ! makes time step smaller for this ref model
-    if( NEX_MAX*multiplication_factor <= 98 ) then
-      if( THREE_D_MODEL == THREE_D_MODEL_S362ANI ) DT = DT*(1.d0 - 0.95d0)
-    endif
-
   endif
 
   ! following models need special attention, regardless of number of chunks:
@@ -422,9 +414,10 @@
     DT = DT*(1.d0 - 0.3d0)
 
   !  decreases time step as otherwise the solution might become unstable for rougher/unsmoothed models
-  !  if( THREE_D_MODEL == THREE_D_MODEL_PPM ) &
-  !    DT = DT * (1.d0 - 0.2d0)
-
+  if( .false. ) then
+    if( THREE_D_MODEL == THREE_D_MODEL_PPM ) DT = DT * (1.d0 - 0.2d0)
+  endif
+  
   ! takes a 5% safety margin on the maximum stable time step
   ! which was obtained by trial and error
   DT = DT * (1.d0 - 0.05d0)

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/model_topo_bathy.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/model_topo_bathy.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/model_topo_bathy.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -55,6 +55,11 @@
   integer :: ier
 
   if(myrank == 0) then
+    ! user output
+    write(IMAIN,*)
+    write(IMAIN,*) 'incorporating topography'
+    
+    ! read/save topo file on master
     call read_topo_bathy_file(ibathy_topo)
     call save_topo_bathy_database(ibathy_topo,LOCAL_PATH)
   endif
@@ -76,14 +81,17 @@
 
   include "constants.h"
 
-! use integer array to store values
+  ! use integer array to store values
   integer, dimension(NX_BATHY,NY_BATHY) :: ibathy_topo
 
   ! local parameters
   real :: val
+  integer :: ival
   integer :: itopo_x,itopo_y,ier
   character(len=150) :: topo_bathy_file
-
+  integer,parameter :: TOPO_MINIMUM = - 10000 ! (depth in m )
+  integer,parameter :: TOPO_MAXIMUM = + 10000 ! (height in m )
+  
   call get_value_string(topo_bathy_file, 'model.topoBathy.PATHNAME_TOPO_FILE', PATHNAME_TOPO_FILE)
 
   ! reads in topography values from file
@@ -97,7 +105,7 @@
   do itopo_y=1,NY_BATHY
     do itopo_x=1,NX_BATHY
       read(13,*,iostat=ier) val
-
+      
       ! checks
       if( ier /= 0 ) then
         print*,'error read topo_bathy: ix,iy = ',itopo_x,itopo_y,val
@@ -106,8 +114,18 @@
       endif
 
       ! converts to integer
-      ibathy_topo(itopo_x,itopo_y) = val
+      ival = nint(val)
 
+      ! checks values
+      if( ival < TOPO_MINIMUM .or. ival > TOPO_MAXIMUM ) then
+        print*,'error read topo_bathy: ival = ',ival,val,'ix,iy = ',itopo_x,itopo_y
+        print*,'topo_bathy dimension: nx,ny = ',NX_BATHY,NY_BATHY
+        call exit_mpi(0,'error reading topo_bathy file')
+      endif
+
+      ! stores in array
+      ibathy_topo(itopo_x,itopo_y) = ival
+
     enddo
   enddo
   close(13)
@@ -134,7 +152,7 @@
   endif
 
   ! user output
-  write(IMAIN,*) "topography/bathymetry: min/max = ",minval(ibathy_topo),maxval(ibathy_topo)
+  write(IMAIN,*) "  topography/bathymetry: min/max = ",minval(ibathy_topo),maxval(ibathy_topo)
 
   end subroutine read_topo_bathy_file
 
@@ -161,21 +179,20 @@
   ! only master needs to save this
   call create_name_database(prname,0,IREGION_CRUST_MANTLE,LOCAL_PATH)
 
-  ! saves topography and bathymetry file for solver
-
+  ! saves topography and bathymetry file for solver  
   open(unit=27,file=prname(1:len_trim(prname))//'topo.bin', &
         status='unknown',form='unformatted',action='write',iostat=ier)
   if( ier /= 0 ) then
+    ! inform about missing database topo file
     print*,'TOPOGRAPHY problem:'
-    print*,'error creating file: ',prname(1:len_trim(prname))//'topo.bin'
+    print*,'error opening file: ',prname(1:len_trim(prname))//'topo.bin'
     print*,'please check if path exists and rerun mesher'
     call exit_mpi(0,'error opening file for database topo')
   endif
-
+ 
   write(27) ibathy_topo
+  close(27)      
 
-  close(27)
-
   end subroutine save_topo_bathy_database
 
 !
@@ -204,15 +221,26 @@
   open(unit=27,file=prname(1:len_trim(prname))//'topo.bin', &
         status='unknown',form='unformatted',action='read',iostat=ier)
   if( ier /= 0 ) then
+    ! inform user
     print*,'TOPOGRAPHY problem:'
     print*,'error opening file: ',prname(1:len_trim(prname))//'topo.bin'
-    print*,'please check if file exists and rerun solver'
-    call exit_mpi(0,'error opening file for database topo')
-  endif
+    !print*,'please check if file exists and rerun solver'
+    !call exit_mpi(0,'error opening file for database topo')
+        
+    ! read by original file
+    print*,'trying original topography file...'
+    call read_topo_bathy_file(ibathy_topo)
 
-  read(27) ibathy_topo
+    ! saves database topo file for next time
+    call save_topo_bathy_database(ibathy_topo,LOCAL_PATH)    
+  else
+    ! database topo file exists
+    read(27) ibathy_topo
+    close(27)  
 
-  close(27)
+    ! user output
+    write(IMAIN,*) "  topography/bathymetry: min/max = ",minval(ibathy_topo),maxval(ibathy_topo)
+  endif
 
   end subroutine read_topo_bathy_database
 
@@ -230,23 +258,30 @@
 
   include "constants.h"
 
-! use integer array to store values
-  integer, dimension(NX_BATHY,NY_BATHY) :: ibathy_topo
+  ! use integer array to store values
+  integer, dimension(NX_BATHY,NY_BATHY),intent(in) :: ibathy_topo
 
-  double precision xlat,xlon,value
+  ! location latitude/longitude (in degree)
+  double precision,intent(in):: xlat,xlon
+  
+  ! returns elevation (in meters)
+  double precision,intent(out):: value
 
-  integer iadd1,iel1
-  double precision samples_per_degree_topo
-  double precision xlo
+  ! local parameters
+  integer:: iadd1,iel1
+  double precision:: samples_per_degree_topo
+  double precision:: xlo
   double precision:: lon_corner,lat_corner,ratio_lon,ratio_lat
 
+  ! longitude within range [0,360] degrees
   xlo = xlon
-  if(xlon < 0.d0) xlo = xlo + 360.d0
+  if(xlo < 0.d0) xlo = xlo + 360.d0
+  if(xlo > 360.d0) xlo = xlo - 360.d0
 
-! compute number of samples per degree
+  ! compute number of samples per degree
   samples_per_degree_topo = dble(RESOLUTION_TOPO_FILE) / 60.d0
 
-! compute offset in data file and avoid edge effects
+  ! compute offset in data file and avoid edge effects
   iadd1 = 1 + int((90.d0-xlat)/samples_per_degree_topo)
   if(iadd1 < 1) iadd1 = 1
   if(iadd1 > NY_BATHY) iadd1 = NY_BATHY
@@ -255,7 +290,8 @@
   if(iel1 <= 0 .or. iel1 > NX_BATHY) iel1 = NX_BATHY
 
 ! Use bilinear interpolation rather nearest point interpolation
-! convert integer value to double precision
+
+  ! convert integer value to double precision
   !  value = dble(ibathy_topo(iel1,iadd1))
 
   lon_corner=iel1*samples_per_degree_topo
@@ -269,7 +305,7 @@
   if(ratio_lat<0.0) ratio_lat=0.0
   if(ratio_lat>1.0) ratio_lat=1.0
 
-! convert integer value to double precision
+  ! convert integer value to double precision
   if( iadd1 <= NY_BATHY-1 .and. iel1 <= NX_BATHY-1 ) then
     ! interpolates for points within boundaries
     value = dble(ibathy_topo(iel1,iadd1))*(1-ratio_lon)*(1.-ratio_lat) &

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_compute_parameters.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_compute_parameters.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_compute_parameters.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -433,11 +433,13 @@
     stop 'need six chunks to include central cube'
 
   ! check that sphere can be cut into slices without getting negative Jacobian
-  if(NEX_XI < 48) &
-    stop 'NEX_XI must be greater than 48 to cut the sphere into slices with positive Jacobian'
-  if(NEX_ETA < 48) &
-    stop 'NEX_ETA must be greater than 48 to cut the sphere into slices with positive Jacobian'
-
+  if(NCHUNKS == 6 ) then
+    if(NEX_XI < 48) &
+      stop 'NEX_XI must be greater than 48 to cut the sphere into slices with positive Jacobian'
+    if(NEX_ETA < 48) &
+      stop 'NEX_ETA must be greater than 48 to cut the sphere into slices with positive Jacobian'
+  endif
+  
   ! check that topology is correct if more than two chunks
   if(NCHUNKS > 2 .and. NEX_XI /= NEX_ETA) &
     stop 'must have NEX_XI = NEX_ETA for more than two chunks'

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_parameter_file.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_parameter_file.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_parameter_file.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -163,8 +163,6 @@
   if(err_occurred() /= 0) stop 'an error occurred while reading the parameter file: NUMBER_OF_THIS_RUN'
   call read_value_string(LOCAL_PATH, 'LOCAL_PATH')
   if(err_occurred() /= 0) stop 'an error occurred while reading the parameter file: LOCAL_PATH'
-  call read_value_string(LOCAL_TMP_PATH, 'LOCAL_TMP_PATH')
-  if(err_occurred() /= 0) stop 'an error occurred while reading the parameter file: LOCAL_TMP_PATH'
   call read_value_integer(NTSTEP_BETWEEN_OUTPUT_INFO, 'solver.NTSTEP_BETWEEN_OUTPUT_INFO')
   if(err_occurred() /= 0) stop 'an error occurred while reading the parameter file: NTSTEP_BETWEEN_OUTPUT_INFO'
   call read_value_integer(NTSTEP_BETWEEN_OUTPUT_SEISMOS, 'solver.NTSTEP_BETWEEN_OUTPUT_SEISMOS')
@@ -193,6 +191,20 @@
   ! closes parameter file
   call close_parameter_file()
 
+  ! optional parameters:
+
+  ! initializes
+  LOCAL_TMP_PATH = LOCAL_PATH
+  
+  ! opens file Par_file
+  call open_parameter_file()
+
+  call read_value_string(LOCAL_TMP_PATH, 'LOCAL_TMP_PATH')
+  call read_value_clear_err()
+  
+  ! close parameter file
+  call close_parameter_file()  
+
   end subroutine read_parameter_file
 
 !

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_value_parameters.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_value_parameters.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/read_value_parameters.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -132,6 +132,17 @@
 
 !--------------------
 
+  subroutine read_value_clear_err()
+
+  integer ierr
+  common /param_err_common/ ierr
+
+  ierr = 0
+
+  end subroutine read_value_clear_err
+
+!--------------------
+
 !
 ! unused routines:
 !

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/write_VTK_file.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/write_VTK_file.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/shared/write_VTK_file.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -0,0 +1,608 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+  subroutine write_VTK_data_points(nglob, &
+                                  xstore_dummy,ystore_dummy,zstore_dummy, &
+                                  points_globalindices,num_points_globalindices, &
+                                  prname_file)
+
+! external mesh routine for saving vtk files for points locations
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: nglob
+
+  ! global coordinates
+  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
+
+  ! gll data values array
+  integer :: num_points_globalindices
+  integer, dimension(num_points_globalindices) :: points_globalindices
+
+  ! file name
+  character(len=150) prname_file
+
+  integer :: i,iglob
+
+  ! write source and receiver VTK files for Paraview
+  !debug
+  !write(IMAIN,*) '  vtk file: '
+  !write(IMAIN,*) '    ',prname_file(1:len_trim(prname_file))//'.vtk'
+
+  open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
+  write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
+  write(IOVTK,'(a)') 'material model VTK file'
+  write(IOVTK,'(a)') 'ASCII'
+  write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
+  write(IOVTK, '(a,i12,a)') 'POINTS ', num_points_globalindices, ' float'
+  do i=1,num_points_globalindices
+    iglob = points_globalindices(i)
+    if( iglob <= 0 .or. iglob > nglob ) then
+      print*,'error: '//prname_file(1:len_trim(prname_file))//'.vtk'
+      print*,'error global index: ',iglob,i
+      close(IOVTK)
+      stop 'error vtk points file'
+    endif
+
+    write(IOVTK,'(3e18.6)') xstore_dummy(iglob),ystore_dummy(iglob),zstore_dummy(iglob)
+  enddo
+  write(IOVTK,*) ""
+
+  close(IOVTK)
+
+  end subroutine write_VTK_data_points
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+  subroutine write_VTK_glob_points(nglob, &
+                                  xstore_dummy,ystore_dummy,zstore_dummy, &
+                                  glob_values, &
+                                  prname_file)
+
+! external mesh routine for saving vtk files for points locations
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: nglob
+
+  ! global coordinates
+  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
+
+  ! gll data values array
+  real(kind=CUSTOM_REAL), dimension(nglob) :: glob_values
+
+  ! file name
+  character(len=150) prname_file
+
+  integer :: iglob
+
+  ! write source and receiver VTK files for Paraview
+  !debug
+  !write(IMAIN,*) '  vtk file: '
+  !write(IMAIN,*) '    ',prname_file(1:len_trim(prname_file))//'.vtk'
+
+  open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
+  write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
+  write(IOVTK,'(a)') 'material model VTK file'
+  write(IOVTK,'(a)') 'ASCII'
+  write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
+  write(IOVTK, '(a,i12,a)') 'POINTS ', nglob, ' float'
+  do iglob=1,nglob
+    write(IOVTK,*) xstore_dummy(iglob),ystore_dummy(iglob),zstore_dummy(iglob)
+  enddo
+  write(IOVTK,*) ""
+
+  ! writes out gll-data (velocity) for each element point
+  write(IOVTK,'(a,i12)') "POINT_DATA ",nglob
+  write(IOVTK,'(a)') "SCALARS glob_data float"
+  write(IOVTK,'(a)') "LOOKUP_TABLE default"
+  do iglob=1,nglob
+    write(IOVTK,*) glob_values(iglob)
+  enddo
+  write(IOVTK,*) ""
+
+  close(IOVTK)
+
+  end subroutine write_VTK_glob_points
+
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+  subroutine write_VTK_data_elem_l(nspec,nglob, &
+                        xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+                        elem_flag,prname_file)
+
+! routine for saving vtk file holding logical flag on each spectral element
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: nspec,nglob
+
+  ! global coordinates
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
+
+  ! element flag array
+  logical, dimension(nspec) :: elem_flag
+  integer :: ispec,i
+
+  ! file name
+  character(len=150) prname_file
+
+  ! write source and receiver VTK files for Paraview
+  !debug
+  !write(IMAIN,*) '  vtk file: '
+  !write(IMAIN,*) '    ',prname_file(1:len_trim(prname_file))//'.vtk'
+
+  open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
+  write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
+  write(IOVTK,'(a)') 'material model VTK file'
+  write(IOVTK,'(a)') 'ASCII'
+  write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
+  write(IOVTK, '(a,i12,a)') 'POINTS ', nglob, ' float'
+  do i=1,nglob
+    write(IOVTK,'(3e18.6)') xstore_dummy(i),ystore_dummy(i),zstore_dummy(i)
+  enddo
+  write(IOVTK,*) ""
+
+  ! note: indices for vtk start at 0
+  write(IOVTK,'(a,i12,i12)') "CELLS ",nspec,nspec*9
+  do ispec=1,nspec
+    write(IOVTK,'(9i12)') 8,ibool(1,1,1,ispec)-1,ibool(NGLLX,1,1,ispec)-1,ibool(NGLLX,NGLLY,1,ispec)-1,ibool(1,NGLLY,1,ispec)-1,&
+          ibool(1,1,NGLLZ,ispec)-1,ibool(NGLLX,1,NGLLZ,ispec)-1,ibool(NGLLX,NGLLY,NGLLZ,ispec)-1,ibool(1,NGLLY,NGLLZ,ispec)-1
+  enddo
+  write(IOVTK,*) ""
+
+  ! type: hexahedrons
+  write(IOVTK,'(a,i12)') "CELL_TYPES ",nspec
+  write(IOVTK,*) (12,ispec=1,nspec)
+  write(IOVTK,*) ""
+
+  write(IOVTK,'(a,i12)') "CELL_DATA ",nspec
+  write(IOVTK,'(a)') "SCALARS elem_flag integer"
+  write(IOVTK,'(a)') "LOOKUP_TABLE default"
+  do ispec = 1,nspec
+    if( elem_flag(ispec) .eqv. .true. ) then
+      write(IOVTK,*) 1
+    else
+      write(IOVTK,*) 0
+    endif
+  enddo
+  write(IOVTK,*) ""
+  close(IOVTK)
+
+
+  end subroutine write_VTK_data_elem_l
+
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+
+  subroutine write_VTK_data_elem_i(nspec,nglob, &
+                        xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+                        elem_flag,prname_file)
+
+
+! routine for saving vtk file holding integer value on each spectral element
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: nspec,nglob
+
+  ! global coordinates
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
+
+  ! element flag array
+  integer, dimension(nspec) :: elem_flag
+  integer :: ispec,i
+
+  ! file name
+  character(len=150) prname_file
+
+  ! write source and receiver VTK files for Paraview
+  !debug
+  !write(IMAIN,*) '  vtk file: '
+  !write(IMAIN,*) '    ',prname_file(1:len_trim(prname_file))//'.vtk'
+
+  open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
+  write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
+  write(IOVTK,'(a)') 'material model VTK file'
+  write(IOVTK,'(a)') 'ASCII'
+  write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
+  write(IOVTK, '(a,i12,a)') 'POINTS ', nglob, ' float'
+  do i=1,nglob
+    write(IOVTK,'(3e18.6)') xstore_dummy(i),ystore_dummy(i),zstore_dummy(i)
+  enddo
+  write(IOVTK,*) ""
+
+  ! note: indices for vtk start at 0
+  write(IOVTK,'(a,i12,i12)') "CELLS ",nspec,nspec*9
+  do ispec=1,nspec
+    write(IOVTK,'(9i12)') 8,ibool(1,1,1,ispec)-1,ibool(NGLLX,1,1,ispec)-1,ibool(NGLLX,NGLLY,1,ispec)-1,ibool(1,NGLLY,1,ispec)-1,&
+          ibool(1,1,NGLLZ,ispec)-1,ibool(NGLLX,1,NGLLZ,ispec)-1,ibool(NGLLX,NGLLY,NGLLZ,ispec)-1,ibool(1,NGLLY,NGLLZ,ispec)-1
+  enddo
+  write(IOVTK,*) ""
+
+  ! type: hexahedrons
+  write(IOVTK,'(a,i12)') "CELL_TYPES ",nspec
+  write(IOVTK,*) (12,ispec=1,nspec)
+  write(IOVTK,*) ""
+
+  write(IOVTK,'(a,i12)') "CELL_DATA ",nspec
+  write(IOVTK,'(a)') "SCALARS elem_val integer"
+  write(IOVTK,'(a)') "LOOKUP_TABLE default"
+  do ispec = 1,nspec
+    write(IOVTK,*) elem_flag(ispec)
+  enddo
+  write(IOVTK,*) ""
+  close(IOVTK)
+
+  end subroutine write_VTK_data_elem_i
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! external mesh routine for saving vtk files for custom_real values on global points
+
+  subroutine write_VTK_data_cr(idoubling,nspec,nglob, &
+                              xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+                              glob_data,prname_file)
+
+! outputs single file for each process
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: nspec,nglob
+
+  integer, dimension(nspec):: idoubling
+
+  ! global coordinates
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
+
+  ! global data values array
+  real(kind=CUSTOM_REAL), dimension(NDIM,nglob) :: glob_data
+
+  ! file name
+  character(len=256) prname_file
+
+  ! local parameters
+  integer :: ispec,i
+  real(kind=CUSTOM_REAL) :: rval,thetaval,phival,xval,yval,zval
+
+  ! write source and receiver VTK files for Paraview
+  open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
+  write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
+  write(IOVTK,'(a)') 'material model VTK file'
+  write(IOVTK,'(a)') 'ASCII'
+  write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
+  write(IOVTK, '(a,i12,a)') 'POINTS ', nglob, ' float'
+  do i=1,nglob
+
+    !x,y,z store have been converted to r theta phi already, need to revert back for xyz output
+    rval = xstore_dummy(i)
+    thetaval = ystore_dummy(i)
+    phival = zstore_dummy(i)
+    call rthetaphi_2_xyz(xval,yval,zval,rval,thetaval,phival)
+
+    !write(IOVTK,'(3e18.6)') xstore_dummy(i),ystore_dummy(i),zstore_dummy(i)
+    write(IOVTK,'(3e18.6)') xval,yval,zval
+  enddo
+  write(IOVTK,*) ""
+
+  ! defines cell on coarse corner points
+  ! note: indices for vtk start at 0
+  write(IOVTK,'(a,i12,i12)') "CELLS ",nspec,nspec*9
+  do ispec=1,nspec
+
+    ! specific to inner core elements
+    ! exclude fictitious elements in central cube
+    if(idoubling(ispec) /= IFLAG_IN_FICTITIOUS_CUBE) then
+      ! valid cell
+      write(IOVTK,'(9i12)') 8,ibool(1,1,1,ispec)-1, &
+                          ibool(NGLLX,1,1,ispec)-1, &
+                          ibool(NGLLX,NGLLY,1,ispec)-1, &
+                          ibool(1,NGLLY,1,ispec)-1, &
+                          ibool(1,1,NGLLZ,ispec)-1, &
+                          ibool(NGLLX,1,NGLLZ,ispec)-1, &
+                          ibool(NGLLX,NGLLY,NGLLZ,ispec)-1, &
+                          ibool(1,NGLLY,NGLLZ,ispec)-1
+    else
+      ! fictitious elements in central cube
+      ! maps cell onto a randomly chosen point
+      write(IOVTK,'(9i12)') 8,ibool(1,1,1,1)-1, &
+                            ibool(1,1,1,1)-1, &
+                            ibool(1,1,1,1)-1, &
+                            ibool(1,1,1,1)-1, &
+                            ibool(1,1,1,1)-1, &
+                            ibool(1,1,1,1)-1, &
+                            ibool(1,1,1,1)-1, &
+                            ibool(1,1,1,1)-1
+    endif
+
+  enddo
+  write(IOVTK,*) ""
+
+  ! type: hexahedrons
+  write(IOVTK,'(a,i12)') "CELL_TYPES ",nspec
+  write(IOVTK,*) (12,ispec=1,nspec)
+  write(IOVTK,*) ""
+
+  ! x components
+  write(IOVTK,'(a,i12)') "POINT_DATA ",nglob
+  write(IOVTK,'(a)') "SCALARS x_comp float"
+  write(IOVTK,'(a)') "LOOKUP_TABLE default"
+  do i = 1,nglob
+      write(IOVTK,*) glob_data(1,i)
+  enddo
+  ! y components
+  write(IOVTK,'(a)') "SCALARS y_comp float"
+  write(IOVTK,'(a)') "LOOKUP_TABLE default"
+  do i = 1,nglob
+      write(IOVTK,*) glob_data(2,i)
+  enddo
+  ! z components
+  write(IOVTK,'(a)') "SCALARS z_comp float"
+  write(IOVTK,'(a)') "LOOKUP_TABLE default"
+  do i = 1,nglob
+      write(IOVTK,*) glob_data(3,i)
+  enddo
+  ! norm
+  write(IOVTK,'(a)') "SCALARS norm float"
+  write(IOVTK,'(a)') "LOOKUP_TABLE default"
+  do i = 1,nglob
+      write(IOVTK,*) sqrt( glob_data(1,i)*glob_data(1,i) &
+                        + glob_data(2,i)*glob_data(2,i) &
+                        + glob_data(3,i)*glob_data(3,i))
+  enddo
+  write(IOVTK,*) ""
+
+  close(IOVTK)
+
+
+  end subroutine write_VTK_data_cr
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+! external mesh routine for saving vtk files for custom_real values on global points
+
+  subroutine write_VTK_data_cr_all(myrank,NPROCTOT,idoubling, &
+                              nspec,nglob, &
+                              xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
+                              glob_data,prname_file)
+
+! outputs single file for all processes
+
+  implicit none
+
+  include "constants.h"
+
+  include 'mpif.h'
+  include "precision.h"
+
+  integer :: myrank,NPROCTOT
+
+  integer ::nspec,nglob
+  
+  integer, dimension(nspec):: idoubling
+
+  ! global coordinates
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
+
+  ! global data values array
+  real(kind=CUSTOM_REAL), dimension(NDIM,nglob) :: glob_data
+
+  ! file name
+  character(len=256) prname_file
+
+  ! local parameters
+  integer :: ispec,i,iproc,ier
+  real(kind=CUSTOM_REAL) :: rval,thetaval,phival,xval,yval,zval
+
+  real(kind=CUSTOM_REAL), dimension(:,:),allocatable :: &
+      store_val_x_all,store_val_y_all,store_val_z_all, &
+      store_val_ux_all,store_val_uy_all,store_val_uz_all
+  integer, dimension(:,:,:,:,:),allocatable :: ibool_all
+  integer, dimension(:,:),allocatable :: idoubling_all
+
+  ! master collect arrays
+  if( myrank == 0 ) then
+    allocate(store_val_x_all(nglob,0:NPROCTOT-1), &
+            store_val_y_all(nglob,0:NPROCTOT-1), &
+            store_val_z_all(nglob,0:NPROCTOT-1), &
+            store_val_ux_all(nglob,0:NPROCTOT-1), &
+            store_val_uy_all(nglob,0:NPROCTOT-1), &
+            store_val_uz_all(nglob,0:NPROCTOT-1), &
+            idoubling_all(nspec,0:NPROCTOT-1), &
+            ibool_all(NGLLX,NGLLY,NGLLZ,nspec,0:NPROCTOT-1),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating stores')
+  else
+    ! dummy arrays
+    allocate(store_val_x_all(1,1), &
+            store_val_y_all(1,1), &
+            store_val_z_all(1,1), &
+            store_val_ux_all(1,1), &
+            store_val_uy_all(1,1), &
+            store_val_uz_all(1,1), &
+            idoubling_all(1,1), &
+            ibool_all(1,1,1,1,1),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating dummy stores')
+  endif
+
+  ! gather info on master proc
+  call MPI_GATHER(xstore_dummy,nglob,CUSTOM_MPI_TYPE,store_val_x_all,nglob,CUSTOM_MPI_TYPE,0,MPI_COMM_WORLD,ier)
+  call MPI_GATHER(ystore_dummy,nglob,CUSTOM_MPI_TYPE,store_val_y_all,nglob,CUSTOM_MPI_TYPE,0,MPI_COMM_WORLD,ier)
+  call MPI_GATHER(zstore_dummy,nglob,CUSTOM_MPI_TYPE,store_val_z_all,nglob,CUSTOM_MPI_TYPE,0,MPI_COMM_WORLD,ier)
+
+  call MPI_GATHER(glob_data(1,:),nglob,CUSTOM_MPI_TYPE,store_val_ux_all,nglob,CUSTOM_MPI_TYPE,0,MPI_COMM_WORLD,ier)
+  call MPI_GATHER(glob_data(2,:),nglob,CUSTOM_MPI_TYPE,store_val_uy_all,nglob,CUSTOM_MPI_TYPE,0,MPI_COMM_WORLD,ier)
+  call MPI_GATHER(glob_data(3,:),nglob,CUSTOM_MPI_TYPE,store_val_uz_all,nglob,CUSTOM_MPI_TYPE,0,MPI_COMM_WORLD,ier)
+
+  call MPI_GATHER(ibool,NGLLX*NGLLY*NGLLZ*nspec,MPI_INTEGER,ibool_all, &
+                  NGLLX*NGLLY*NGLLZ*nspec,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+  call MPI_GATHER(idoubling,nspec,MPI_INTEGER,idoubling_all,nspec,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+
+
+  if( myrank == 0 ) then
+
+    ! write source and receiver VTK files for Paraview
+    open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
+    write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
+    write(IOVTK,'(a)') 'material model VTK file'
+    write(IOVTK,'(a)') 'ASCII'
+    write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
+    write(IOVTK, '(a,i12,a)') 'POINTS ', nglob*NPROCTOT, ' float'
+    do iproc=0, NPROCTOT-1
+      do i=1,nglob
+
+        !x,y,z store have been converted to r theta phi already, need to revert back for xyz output
+        rval = store_val_x_all(i,iproc)
+        thetaval = store_val_y_all(i,iproc)
+        phival = store_val_z_all(i,iproc)
+        call rthetaphi_2_xyz(xval,yval,zval,rval,thetaval,phival)
+
+        !write(IOVTK,'(3e18.6)') xstore_dummy(i),ystore_dummy(i),zstore_dummy(i)
+        write(IOVTK,'(3e18.6)') xval,yval,zval
+      enddo
+    enddo
+    write(IOVTK,*) ""
+
+    ! defines cell on coarse corner points
+    ! note: indices for vtk start at 0
+    write(IOVTK,'(a,i12,i12)') "CELLS ",nspec*NPROCTOT,nspec*NPROCTOT*9
+    do iproc=0, NPROCTOT-1
+      do ispec=1,nspec
+
+        ! note: central cube elements are only shared and used in CHUNK_AB and CHUNK_AB_ANTIPODE
+        !          all other chunks ignore those elements
+
+        ! specific to inner core elements
+        ! exclude fictitious elements in central cube
+        if(idoubling_all(ispec,iproc) /= IFLAG_IN_FICTITIOUS_CUBE) then
+          ! valid cell
+          ! cell corner ids
+          write(IOVTK,'(9i12)') 8,ibool_all(1,1,1,ispec,iproc)-1+iproc*nglob, &
+                            ibool_all(NGLLX,1,1,ispec,iproc)-1+iproc*nglob, &
+                            ibool_all(NGLLX,NGLLY,1,ispec,iproc)-1+iproc*nglob, &
+                            ibool_all(1,NGLLY,1,ispec,iproc)-1+iproc*nglob, &
+                            ibool_all(1,1,NGLLZ,ispec,iproc)-1+iproc*nglob, &
+                            ibool_all(NGLLX,1,NGLLZ,ispec,iproc)-1+iproc*nglob, &
+                            ibool_all(NGLLX,NGLLY,NGLLZ,ispec,iproc)-1+iproc*nglob, &
+                            ibool_all(1,NGLLY,NGLLZ,ispec,iproc)-1+iproc*nglob
+        else
+          ! fictitious elements in central cube
+          ! maps cell onto a randomly chosen point
+          write(IOVTK,'(9i12)') 8,ibool_all(1,1,1,1,iproc)-1, &
+                            ibool_all(1,1,1,1,iproc)-1, &
+                            ibool_all(1,1,1,1,iproc)-1, &
+                            ibool_all(1,1,1,1,iproc)-1, &
+                            ibool_all(1,1,1,1,iproc)-1, &
+                            ibool_all(1,1,1,1,iproc)-1, &
+                            ibool_all(1,1,1,1,iproc)-1, &
+                            ibool_all(1,1,1,1,iproc)-1
+        endif
+
+      enddo
+    enddo
+    write(IOVTK,*) ""
+
+    ! type: hexahedrons
+    write(IOVTK,'(a,i12)') "CELL_TYPES ",nspec*NPROCTOT
+    write(IOVTK,*) (12,ispec=1,nspec*NPROCTOT)
+    write(IOVTK,*) ""
+
+    ! x components
+    write(IOVTK,'(a,i12)') "POINT_DATA ",nglob*NPROCTOT
+    write(IOVTK,'(a)') "SCALARS x_comp float"
+    write(IOVTK,'(a)') "LOOKUP_TABLE default"
+    do iproc=0, NPROCTOT-1
+      do i = 1,nglob
+        write(IOVTK,*) store_val_ux_all(i,iproc)
+      enddo
+    enddo
+    ! y components
+    write(IOVTK,'(a)') "SCALARS y_comp float"
+    write(IOVTK,'(a)') "LOOKUP_TABLE default"
+    do iproc=0, NPROCTOT-1
+      do i = 1,nglob
+        write(IOVTK,*) store_val_uy_all(i,iproc)
+      enddo
+    enddo
+    ! z components
+    write(IOVTK,'(a)') "SCALARS z_comp float"
+    write(IOVTK,'(a)') "LOOKUP_TABLE default"
+    do iproc=0, NPROCTOT-1
+      do i = 1,nglob
+        write(IOVTK,*) store_val_uz_all(i,iproc)
+      enddo
+    enddo
+    ! norm
+    write(IOVTK,'(a)') "SCALARS norm float"
+    write(IOVTK,'(a)') "LOOKUP_TABLE default"
+    do iproc=0, NPROCTOT-1
+      do i = 1,nglob
+        write(IOVTK,*) sqrt( store_val_ux_all(i,iproc)**2 &
+                          + store_val_uy_all(i,iproc)**2 &
+                          + store_val_uz_all(i,iproc)**2 )
+      enddo
+    enddo
+    write(IOVTK,*) ""
+
+    close(IOVTK)
+
+  endif
+
+  deallocate(store_val_x_all,store_val_y_all,store_val_z_all, &
+            store_val_ux_all,store_val_uy_all,store_val_uz_all, &
+            ibool_all)
+
+  end subroutine write_VTK_data_cr_all

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in	2012-04-16 21:29:35 UTC (rev 19948)
@@ -91,7 +91,6 @@
 libspecfem_a_OBJECTS_SOLVER = \
 	$O/assemble_MPI_scalar.mpicheckno.o \
 	$O/assemble_MPI_vector.mpicheckno.o \
-	$O/assemble_MPI_scalar_block.mpicheckno.o \
 	$O/auto_ner.shared.o \
 	$O/broadcast_compute_parameters.sharedmpi.o \
 	$O/calendar.shared.o \
@@ -100,7 +99,6 @@
 	$O/compute_adj_source_frechet.check.o \
 	$O/compute_arrays_source.check.o \
 	$O/convert_time.check.o \
-	$O/create_central_cube_buffers.mpicheck.o \
 	$O/create_name_database.shared.o \
 	$O/count_elements.shared.o \
 	$O/count_number_of_sources.shared.o \
@@ -141,6 +139,7 @@
 	$O/write_seismograms.mpicheck.o \
 	$O/write_output_ASCII.mpicheck.o \
 	$O/write_output_SAC.mpicheck.o \
+	$O/write_VTK_file.sharedmpi.o \
 	$(EMPTY_MACRO)
 
 # solver objects with statically allocated arrays; dependent upon
@@ -148,8 +147,6 @@
 
 SOLVER_ARRAY_OBJECTS = \
 	$O/specfem3D_par.solver.o \
-	$O/assemble_MPI_central_cube_block.mpisolver.o \
-	$O/assemble_MPI_central_cube.mpisolver.o \
 	$O/check_simulation_stability.mpisolver.o \
 	$O/compute_add_sources.solver.o \
 	$O/compute_boundary_kernel.solvercheck.o \
@@ -167,7 +164,6 @@
 	$O/compute_seismograms.solver.o \
 	$O/compute_stacey_crust_mantle.solver.o \
 	$O/compute_stacey_outer_core.solver.o \
-	$O/fix_non_blocking_flags.mpisolvercheck.o \
 	$O/finalize_simulation.mpisolver.o \
 	$O/initialize_simulation.mpisolver.o \
 	$O/iterate_time.mpisolver.o \

Deleted: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -1,353 +0,0 @@
-!=====================================================================
-!
-!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
-!          --------------------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!                        Princeton University, USA
-!             and University of Pau / CNRS / INRIA, France
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-  subroutine assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
-                                      npoin2D_cube_from_slices, &
-                                      buffer_all_cube_from_slices,buffer_slices, &
-                                      request_send_cc,request_receive_cc, &
-                                      request_send_array_cc,request_receive_array_cc, &
-                                      ibool_central_cube, &
-                                      receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
-                                      ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE, &
-                                      vector_assemble,ndim_assemble, &
-                                      iphase_comm_CC)
-
-  implicit none
-
-  ! standard include of the MPI library
-  include 'mpif.h'
-  include 'constants.h'
-
-  ! include values created by the mesher
-  include "OUTPUT_FILES/values_from_mesher.h"
-
-  ! for matching with central cube in inner core
-  integer, intent(in) :: ichunk, nb_msgs_theor_in_cube, npoin2D_cube_from_slices
-  integer, intent(in) :: ndim_assemble
-  integer, intent(in) :: receiver_cube_from_slices
-  integer, intent(inout) :: iphase_comm_CC
-  integer, dimension(nb_msgs_theor_in_cube), intent(in) :: sender_from_slices_to_cube
-
-  double precision, dimension(npoin2D_cube_from_slices,ndim_assemble), intent(inout) :: &
-    buffer_slices
-  double precision, dimension(npoin2D_cube_from_slices,ndim_assemble,nb_msgs_theor_in_cube), intent(inout) :: &
-    buffer_all_cube_from_slices
-
-  ! note: these parameters are "saved" now as global parameters
-  ! MPI status of messages to be received
-  integer, intent(inout) :: request_send_cc,request_receive_cc
-  ! maximum value of nb_msgs_theor_in_cube is 5 (when NPROC_XI == 1)
-  ! therefore NPROC_XI+4 is always large enough
-  integer, dimension(NPROC_XI_VAL+4), intent(inout) :: request_send_array_cc,request_receive_array_cc
-
-  integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices), intent(in) :: ibool_central_cube
-
-  ! local to global mapping
-  integer, intent(in) :: NSPEC2D_BOTTOM_INNER_CORE
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE), intent(in) :: ibool_inner_core
-  integer, dimension(NSPEC_INNER_CORE), intent(in) :: idoubling_inner_core
-  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE), intent(in) :: ibelm_bottom_inner_core
-
-  ! vector
-  real(kind=CUSTOM_REAL), dimension(ndim_assemble,NGLOB_INNER_CORE), intent(inout) :: vector_assemble
-
-! local parameters
-
-  integer ipoin,idimension, ispec2D, ispec
-  integer i,j,k
-  integer sender,receiver,imsg
-
-  real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: array_central_cube
-
-  logical :: flag_result_test
-  integer, dimension(MPI_STATUS_SIZE) :: msg_status
-  integer :: ier
-
-  ! mask
-  logical, dimension(NGLOB_INNER_CORE) :: mask
-
-  !---
-  !---  use buffers to assemble mass matrix with central cube once and for all
-  !---
-
-  select case( iphase_comm_CC )
-
-  case( 1 )
-
-    ! non-central-cube chunks send values to receiver central cube chunks AB or AB_ANTIPODE
-
-    ! note: only chunks AB and AB_ANTIPODE contain valid central cube elements,
-    !          all other have only fictitious central cube elements
-
-    ! on chunks AB and AB_ANTIPODE, receive all the messages from slices
-    if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-      do imsg = 1,nb_msgs_theor_in_cube-1
-        ! receive buffers from slices
-        sender = sender_from_slices_to_cube(imsg)
-        call MPI_IRECV(buffer_all_cube_from_slices(1,1,imsg), &
-                  ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
-                  itag,MPI_COMM_WORLD,request_receive_array_cc(imsg),ier)
-      enddo
-    endif
-
-    ! send info to central cube from all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
-    if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
-      ! for bottom elements in contact with central cube from the slices side
-      ipoin = 0
-      do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
-        ispec = ibelm_bottom_inner_core(ispec2D)
-        ! only for DOFs exactly on surface of central cube (bottom of these elements)
-        k = 1
-        do j = 1,NGLLY
-          do i = 1,NGLLX
-            ipoin = ipoin + 1
-            buffer_slices(ipoin,:) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
-          enddo
-        enddo
-      enddo
-      ! send buffer to central cube
-      receiver = receiver_cube_from_slices
-      call MPI_ISEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
-                MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,request_send_cc,ier)
-    endif  ! end sending info to central cube
-
-    iphase_comm_CC = iphase_comm_CC + 1
-    return ! exit because we have started some communications therefore we need some time
-
-  case( 2 )
-
-    ! central cube chunks AB and AB_ANTIPODE send values to each other
-
-    ! checks that chunks have sent out messages
-    if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
-      call MPI_TEST(request_send_cc,flag_result_test,msg_status,ier)
-      if(.not. flag_result_test) return ! exit if message not sent yet
-    endif
-
-    ! checks that central cube chunks have received all (requested) messages
-    if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-      do imsg = 1,nb_msgs_theor_in_cube-1
-        call MPI_TEST(request_receive_array_cc(imsg),flag_result_test,msg_status,ier)
-        if(.not. flag_result_test) return ! exit if message not received yet
-      enddo
-    endif
-
-    ! exchange of their bottom faces between chunks AB and AB_ANTIPODE
-    if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-      ipoin = 0
-      do ispec = NSPEC_INNER_CORE, 1, -1
-        if (idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE) then
-          k = 1
-          do j = 1,NGLLY
-            do i = 1,NGLLX
-              ipoin = ipoin + 1
-              buffer_slices(ipoin,:) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
-            enddo
-          enddo
-        endif
-      enddo
-      sender = sender_from_slices_to_cube(nb_msgs_theor_in_cube)
-    !   call MPI_SENDRECV(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
-    !       itag,buffer_slices2,ndim_assemble*npoin2D_cube_from_slices,&
-    !       MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,msg_status,ier)
-
-      call MPI_IRECV(buffer_all_cube_from_slices(1,1,nb_msgs_theor_in_cube), &
-          ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,request_receive_cc,ier)
-    !! DK DK this merged with previous statement
-    !   buffer_all_cube_from_slices(:,:,nb_msgs_theor_in_cube) = buffer_slices2(:,:)
-
-      call MPI_ISEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
-          itag,MPI_COMM_WORLD,request_send_cc,ier)
-    endif
-
-    iphase_comm_CC = iphase_comm_CC + 1
-    return ! exit because we have started some communications therefore we need some time
-
-  case( 3 )
-
-    !--- now we need to assemble the contributions
-
-    ! central cube chunks AB and AB_ANTIPODE assemble values and send them out to others
-
-    if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-
-      ! checks that messages between AB and AB_ANTIPODE have been sent and received
-      call MPI_TEST(request_send_cc,flag_result_test,msg_status,ier)
-      if(.not. flag_result_test) return ! exit if message not sent yet
-      call MPI_TEST(request_receive_cc,flag_result_test,msg_status,ier)
-      if(.not. flag_result_test) return ! exit if message not received yet
-
-      do idimension = 1,ndim_assemble
-        ! erase contributions to central cube array
-        array_central_cube(:) = 0._CUSTOM_REAL
-
-        ! use indirect addressing to store contributions only once
-        ! distinguish between single and double precision for reals
-        do imsg = 1,nb_msgs_theor_in_cube-1
-          do ipoin = 1,npoin2D_cube_from_slices
-            if(CUSTOM_REAL == SIZE_REAL) then
-              array_central_cube(ibool_central_cube(imsg,ipoin)) = sngl(buffer_all_cube_from_slices(ipoin,idimension,imsg))
-            else
-              array_central_cube(ibool_central_cube(imsg,ipoin)) = buffer_all_cube_from_slices(ipoin,idimension,imsg)
-            endif
-          enddo
-        enddo
-        ! add the constribution of AB or AB_ANTIPODE to sum with the external slices on the edges
-        ! use a mask to avoid taking the same point into account several times.
-        mask(:) = .false.
-        do ipoin = 1,npoin2D_cube_from_slices
-          if (.not. mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin))) then
-            if(CUSTOM_REAL == SIZE_REAL) then
-              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
-              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
-              sngl(buffer_all_cube_from_slices(ipoin,idimension,nb_msgs_theor_in_cube))
-            else
-              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
-              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
-              buffer_all_cube_from_slices(ipoin,idimension,nb_msgs_theor_in_cube)
-            endif
-            mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = .true.
-          endif
-        enddo
-
-        ! suppress degrees of freedom already assembled at top of cube on edges
-        do ispec = 1,NSPEC_INNER_CORE
-          if(idoubling_inner_core(ispec) == IFLAG_TOP_CENTRAL_CUBE) then
-            k = NGLLZ
-            do j = 1,NGLLY
-              do i = 1,NGLLX
-                array_central_cube(ibool_inner_core(i,j,k,ispec)) = 0._CUSTOM_REAL
-              enddo
-            enddo
-          endif
-        enddo
-
-        ! assemble contributions
-        vector_assemble(idimension,:) = vector_assemble(idimension,:) + array_central_cube(:)
-
-        ! copy sum back
-        do imsg = 1,nb_msgs_theor_in_cube-1
-          do ipoin = 1,npoin2D_cube_from_slices
-            buffer_all_cube_from_slices(ipoin,idimension,imsg) = vector_assemble(idimension,ibool_central_cube(imsg,ipoin))
-          enddo
-        enddo
-
-      enddo
-
-    endif
-
-    !----------
-
-    ! receive info from central cube on all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
-    if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
-      ! receive buffers from slices
-      sender = receiver_cube_from_slices
-      call MPI_IRECV(buffer_slices, &
-                  ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
-                  itag,MPI_COMM_WORLD,request_receive_cc,ier)
-      ! for bottom elements in contact with central cube from the slices side
-      !   ipoin = 0
-      !   do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
-      !     ispec = ibelm_bottom_inner_core(ispec2D)
-      ! only for DOFs exactly on surface of central cube (bottom of these elements)
-      !     k = 1
-      !     do j = 1,NGLLY
-      !       do i = 1,NGLLX
-      !         ipoin = ipoin + 1
-      ! distinguish between single and double precision for reals
-      !         if(CUSTOM_REAL == SIZE_REAL) then
-      !           vector_assemble(:,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,:))
-      !         else
-      !           vector_assemble(:,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,:)
-      !         endif
-      !       enddo
-      !     enddo
-      !   enddo
-    endif  ! end receiving info from central cube
-
-    !------- send info back from central cube to slices
-
-    ! on chunk AB & CHUNK_AB_ANTIPODE, send all the messages to slices
-    if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-      do imsg = 1,nb_msgs_theor_in_cube-1
-        ! send buffers to slices
-        receiver = sender_from_slices_to_cube(imsg)
-        call MPI_ISEND(buffer_all_cube_from_slices(1,1,imsg),ndim_assemble*npoin2D_cube_from_slices, &
-                MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,request_send_array_cc(imsg),ier)
-      enddo
-    endif
-
-    iphase_comm_CC = iphase_comm_CC + 1
-    return ! exit because we have started some communications therefore we need some time
-
-  case( 4 )
-
-    ! all non-central cube chunks set the values at the common points with central cube
-
-    ! checks that messages were sent out by central cube chunks AB and AB_ANTIPODE
-    if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-      do imsg = 1,nb_msgs_theor_in_cube-1
-        call MPI_TEST(request_send_array_cc(imsg),flag_result_test,msg_status,ier)
-        if(.not. flag_result_test) return ! exit if message not sent yet
-      enddo
-    endif
-
-    ! checks that messages have been received
-    if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
-      call MPI_TEST(request_receive_cc,flag_result_test,msg_status,ier)
-      if(.not. flag_result_test) return ! exit if message not received yet
-    endif
-
-    ! receive info from central cube on all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
-    if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
-      ! for bottom elements in contact with central cube from the slices side
-      ipoin = 0
-      do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
-        ispec = ibelm_bottom_inner_core(ispec2D)
-        ! only for DOFs exactly on surface of central cube (bottom of these elements)
-        k = 1
-        do j = 1,NGLLY
-          do i = 1,NGLLX
-            ipoin = ipoin + 1
-            ! distinguish between single and double precision for reals
-            if(CUSTOM_REAL == SIZE_REAL) then
-              vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,:))
-            else
-              vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,:)
-            endif
-          enddo
-        enddo
-      enddo
-    endif  ! end receiving info from central cube
-
-    ! this is the exit condition, to go beyond the last phase number
-    iphase_comm_CC = iphase_comm_CC + 1
-
-  end select
-
-  end subroutine assemble_MPI_central_cube
-

Deleted: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube_block.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube_block.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube_block.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -1,327 +0,0 @@
-!=====================================================================
-!
-!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
-!          --------------------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!                        Princeton University, USA
-!             and University of Pau / CNRS / INRIA, France
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-subroutine assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
-                                          npoin2D_cube_from_slices, &
-                                          buffer_all_cube_from_slices, buffer_slices, buffer_slices2, &
-                                          ibool_central_cube, &
-                                          receiver_cube_from_slices, ibool_inner_core, &
-                                          idoubling_inner_core, NSPEC_INNER_CORE, &
-                                          ibelm_bottom_inner_core, NSPEC2D_BOTTOM_INNER_CORE,NGLOB_INNER_CORE, &
-                                          vector_assemble,ndim_assemble, &
-                                          iproc_eta,addressing,NCHUNKS,NPROC_XI,NPROC_ETA)
-
-! this version of the routine is based on blocking MPI calls
-
-  implicit none
-
-! standard include of the MPI library
-  include 'mpif.h'
-  include 'constants.h'
-
-! for matching with central cube in inner core
-  integer ichunk, nb_msgs_theor_in_cube, npoin2D_cube_from_slices
-  integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
-  double precision, dimension(npoin2D_cube_from_slices,NDIM) :: &
-    buffer_slices,buffer_slices2
-  double precision, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM) :: &
-    buffer_all_cube_from_slices
-  integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
-  integer receiver_cube_from_slices
-
-! local to global mapping
-  integer NSPEC_INNER_CORE,NSPEC2D_BOTTOM_INNER_CORE, NGLOB_INNER_CORE
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
-  integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
-  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
-
-! vector
-  integer ndim_assemble
-  real(kind=CUSTOM_REAL), dimension(ndim_assemble,NGLOB_INNER_CORE) :: vector_assemble
-
-!daniel: for addressing of the slices
-  integer, intent(in) :: NCHUNKS,NPROC_XI,NPROC_ETA
-  integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1), intent(in) :: addressing
-  integer, intent(in) :: iproc_eta
-
-  integer ipoin,idimension, ispec2D, ispec
-  integer i,j,k
-  integer sender,receiver,imsg
-
-  real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: array_central_cube
-
-! MPI status of messages to be received
-  integer msg_status(MPI_STATUS_SIZE), ier
-
-! mask
-  logical, dimension(NGLOB_INNER_CORE) :: mask
-
-!---
-!---  now use buffers to assemble mass matrix with central cube once and for all
-!---
-
-! on chunks AB and AB_ANTIPODE, receive all the messages from slices
-  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-
-    do imsg = 1,nb_msgs_theor_in_cube-1
-
-! receive buffers from slices
-    sender = sender_from_slices_to_cube(imsg)
-    call MPI_RECV(buffer_slices, &
-                ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
-                itag,MPI_COMM_WORLD,msg_status,ier)
-
-! copy buffer in 2D array for each slice
-    buffer_all_cube_from_slices(imsg,:,1:ndim_assemble) = buffer_slices(:,1:ndim_assemble)
-
-    enddo
-  endif
-
-! send info to central cube from all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
-  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
-
-! for bottom elements in contact with central cube from the slices side
-    ipoin = 0
-    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
-
-      ispec = ibelm_bottom_inner_core(ispec2D)
-
-! only for DOFs exactly on surface of central cube (bottom of these elements)
-      k = 1
-      do j = 1,NGLLY
-        do i = 1,NGLLX
-          ipoin = ipoin + 1
-          buffer_slices(ipoin,1:ndim_assemble) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
-        enddo
-      enddo
-    enddo
-
-! send buffer to central cube
-    receiver = receiver_cube_from_slices
-    call MPI_SEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
-                 MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,ier)
-
-    ! daniel: in case NPROC_XI == 1, the other chunks exchange all bottom points with
-    ! CHUNK_AB **and** CHUNK_AB_ANTIPODE
-    if(NPROC_XI==1) then
-      call MPI_SEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
-                   MPI_DOUBLE_PRECISION, &
-                   addressing(CHUNK_AB_ANTIPODE,0,iproc_eta), &
-                   itag,MPI_COMM_WORLD,ier)
-    endif
-
-
- endif  ! end sending info to central cube
-
-
-! exchange of their bottom faces between chunks AB and AB_ANTIPODE
-  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-
-    ipoin = 0
-    do ispec = NSPEC_INNER_CORE, 1, -1
-      if (idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE) then
-        k = 1
-        do j = 1,NGLLY
-          do i = 1,NGLLX
-            ipoin = ipoin + 1
-            buffer_slices(ipoin,1:ndim_assemble) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
-          enddo
-        enddo
-      endif
-    enddo
-
-    sender = sender_from_slices_to_cube(nb_msgs_theor_in_cube)
-
-    call MPI_SENDRECV(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
-        itag,buffer_slices2,ndim_assemble*npoin2D_cube_from_slices,&
-        MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,msg_status,ier)
-
-   buffer_all_cube_from_slices(nb_msgs_theor_in_cube,:,1:ndim_assemble) = buffer_slices2(:,1:ndim_assemble)
-
-  endif
-
-!--- now we need to assemble the contributions
-
-  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-
-    do idimension = 1,ndim_assemble
-! erase contributions to central cube array
-      array_central_cube(:) = 0._CUSTOM_REAL
-
-! use indirect addressing to store contributions only once
-! distinguish between single and double precision for reals
-      do imsg = 1,nb_msgs_theor_in_cube-1
-        do ipoin = 1,npoin2D_cube_from_slices
-!daniel: debug
-          if(NPROC_XI==1) then
-            if(ibool_central_cube(imsg,ipoin) > 0 ) then
-              array_central_cube(ibool_central_cube(imsg,ipoin)) = buffer_all_cube_from_slices(imsg,ipoin,idimension)
-            endif
-          else
-            if(CUSTOM_REAL == SIZE_REAL) then
-              array_central_cube(ibool_central_cube(imsg,ipoin)) = sngl(buffer_all_cube_from_slices(imsg,ipoin,idimension))
-            else
-              array_central_cube(ibool_central_cube(imsg,ipoin)) = buffer_all_cube_from_slices(imsg,ipoin,idimension)
-            endif
-          endif
-        enddo
-      enddo
-! add the constribution of AB or AB_ANTIPODE to sum with the external slices on the edges
-! use a mask to avoid taking the same point into account several times.
-      mask(:) = .false.
-      do ipoin = 1,npoin2D_cube_from_slices
-!daniel:debug
-        if(NPROC_XI==1) then
-          if( ibool_central_cube(nb_msgs_theor_in_cube,ipoin) > 0 ) then
-            if (.not. mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin))) then
-              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
-              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
-              buffer_all_cube_from_slices(nb_msgs_theor_in_cube,ipoin,idimension)
-            endif
-            mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = .true.
-          endif
-        else
-          if (.not. mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin))) then
-            if(CUSTOM_REAL == SIZE_REAL) then
-              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
-              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
-              sngl(buffer_all_cube_from_slices(nb_msgs_theor_in_cube,ipoin,idimension))
-            else
-              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
-              array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
-              buffer_all_cube_from_slices(nb_msgs_theor_in_cube,ipoin,idimension)
-            endif
-          endif
-          mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = .true.
-        endif
-      enddo
-
-! suppress degrees of freedom already assembled at top of cube on edges
-      do ispec = 1,NSPEC_INNER_CORE
-        if(idoubling_inner_core(ispec) == IFLAG_TOP_CENTRAL_CUBE) then
-          k = NGLLZ
-          do j = 1,NGLLY
-            do i = 1,NGLLX
-              array_central_cube(ibool_inner_core(i,j,k,ispec)) = 0._CUSTOM_REAL
-            enddo
-          enddo
-        endif
-      enddo
-
-! assemble contributions
-      vector_assemble(idimension,:) = vector_assemble(idimension,:) + array_central_cube(:)
-
-! copy sum back
-      do imsg = 1,nb_msgs_theor_in_cube-1
-        do ipoin = 1,npoin2D_cube_from_slices
-!daniel:debug
-          if(NPROC_XI==1) then
-            if( ibool_central_cube(imsg,ipoin) > 0 ) then
-              buffer_all_cube_from_slices(imsg,ipoin,idimension) = &
-                      vector_assemble(idimension,ibool_central_cube(imsg,ipoin))
-            else
-              buffer_all_cube_from_slices(imsg,ipoin,idimension) = 0._CUSTOM_REAL
-            endif
-          else
-            buffer_all_cube_from_slices(imsg,ipoin,idimension) = &
-                    vector_assemble(idimension,ibool_central_cube(imsg,ipoin))
-          endif
-        enddo
-      enddo
-
-    enddo
-
-  endif
-
-!----------
-
-! receive info from central cube on all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
-  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
-
-! receive buffers from slices
-    sender = receiver_cube_from_slices
-    call MPI_RECV(buffer_slices, &
-                ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
-                itag,MPI_COMM_WORLD,msg_status,ier)
-
-    ! daniel: in case NPROC_XI == 1, the other chunks exchange all bottom points with
-    ! CHUNK_AB **and** CHUNK_AB_ANTIPODE
-    if(NPROC_XI==1) then
-      call MPI_RECV(buffer_slices2, &
-                  ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION, &
-                  addressing(CHUNK_AB_ANTIPODE,0,iproc_eta), &
-                  itag,MPI_COMM_WORLD,msg_status,ier)
-
-      buffer_slices = buffer_slices + buffer_slices2
-    endif
-
-
-! for bottom elements in contact with central cube from the slices side
-    ipoin = 0
-    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
-
-      ispec = ibelm_bottom_inner_core(ispec2D)
-
-! only for DOFs exactly on surface of central cube (bottom of these elements)
-      k = 1
-      do j = 1,NGLLY
-        do i = 1,NGLLX
-          ipoin = ipoin + 1
-
-! distinguish between single and double precision for reals
-          if(CUSTOM_REAL == SIZE_REAL) then
-            vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,1:ndim_assemble))
-          else
-            vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,1:ndim_assemble)
-          endif
-
-        enddo
-      enddo
-    enddo
-
- endif  ! end receiving info from central cube
-
-!------- send info back from central cube to slices
-
-! on chunk AB & CHUNK_AB_ANTIPODE, send all the messages to slices
-  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-
-   do imsg = 1,nb_msgs_theor_in_cube-1
-
-! copy buffer in 2D array for each slice
-   buffer_slices(:,1:ndim_assemble) = buffer_all_cube_from_slices(imsg,:,1:ndim_assemble)
-
-! send buffers to slices
-    receiver = sender_from_slices_to_cube(imsg)
-    call MPI_SEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
-              MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,ier)
-
-   enddo
-   endif
-
-end subroutine assemble_MPI_central_cube_block
-

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -29,566 +29,107 @@
 !---- assemble the contributions between slices and chunks using MPI
 !----
 
-  subroutine assemble_MPI_scalar(myrank,array_val,nglob, &
-            iproc_xi,iproc_eta,ichunk,addressing, &
-            iboolleft_xi,iboolright_xi,iboolleft_eta,iboolright_eta, &
-            npoin2D_faces,npoin2D_xi,npoin2D_eta, &
-            iboolfaces,iboolcorner, &
-            iprocfrom_faces,iprocto_faces, &
-            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-            buffer_send_faces_scalar,buffer_received_faces_scalar, &
-            npoin2D_max_all_CM_IC, &
-            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
-            request_send,request_receive, &
-            request_send_array,request_receive_array, &
-            NUMMSGS_FACES,NCORNERSCHUNKS, &
-            NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL, &
-            NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY,NCHUNKS, &
-            iphase_comm)
 
-  implicit none
+  subroutine assemble_MPI_scalar_ext_mesh(NPROC,NGLOB_AB,array_val, &
+                        num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
+                        nibool_interfaces_ext_mesh,ibool_interfaces_ext_mesh, &
+                        my_neighbours_ext_mesh)
 
-! standard include of the MPI library
-  include 'mpif.h'
+! blocking send/receive
 
+  implicit none
+
   include "constants.h"
-  include "precision.h"
 
-  integer myrank,nglob,NCHUNKS,iphase_comm
+  integer :: NPROC
+  integer :: NGLOB_AB
 
-! array to assemble
-  real(kind=CUSTOM_REAL), dimension(nglob), intent(inout) :: array_val
+  ! array to assemble
+  real(kind=CUSTOM_REAL), dimension(NGLOB_AB) :: array_val
 
-  integer, intent(in) :: iproc_xi,iproc_eta,ichunk
-  integer, dimension(NB_SQUARE_EDGES_ONEDIR), intent(in) :: npoin2D_xi,npoin2D_eta
-  integer, intent(in) :: npoin2D_faces(NUMFACES_SHARED)
+  integer :: num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh
+  integer, dimension(num_interfaces_ext_mesh) :: nibool_interfaces_ext_mesh,my_neighbours_ext_mesh
+  integer, dimension(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh) :: ibool_interfaces_ext_mesh
 
-  integer, intent(in) :: NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY
-  integer, intent(in) :: NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL
-  integer, intent(in) :: NUMMSGS_FACES,NCORNERSCHUNKS
+  ! local parameters
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_send_scalar_ext_mesh
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_recv_scalar_ext_mesh
+  integer, dimension(:), allocatable :: request_send_scalar_ext_mesh
+  integer, dimension(:), allocatable :: request_recv_scalar_ext_mesh
 
-! for addressing of the slices
-  integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1), intent(in) :: addressing
 
-! 2-D addressing and buffers for summation between slices
-  integer, dimension(NGLOB2DMAX_XMIN_XMAX), intent(in) :: iboolleft_xi,iboolright_xi
-  integer, dimension(NGLOB2DMAX_YMIN_YMAX), intent(in) :: iboolleft_eta,iboolright_eta
+  integer ipoin,iinterface,ier
 
-! indirect addressing for each corner of the chunks
-  integer, dimension(NGLOB1D_RADIAL,NUMCORNERS_SHARED), intent(in) :: iboolcorner
+! here we have to assemble all the contributions between partitions using MPI
 
-  integer, intent(in) :: npoin2D_max_all_CM_IC
-  integer, dimension(NGLOB2DMAX_XY,NUMFACES_SHARED), intent(in) :: iboolfaces
-  real(kind=CUSTOM_REAL), dimension(npoin2D_max_all_CM_IC,NUMFACES_SHARED), intent(inout) :: &
-    buffer_send_faces_scalar, &
-    buffer_received_faces_scalar
+  ! assemble only if more than one partition
+  if(NPROC > 1) then
 
-  ! buffers for send and receive between corners of the chunks
-  real(kind=CUSTOM_REAL), dimension(NGLOB1D_RADIAL), intent(inout) :: buffer_send_chunkcorn_scalar, &
-                                                                      buffer_recv_chunkcorn_scalar
+    allocate(buffer_send_scalar_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array buffer_send_scalar_ext_mesh'
+    allocate(buffer_recv_scalar_ext_mesh(max_nibool_interfaces_ext_mesh,num_interfaces_ext_mesh),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array buffer_recv_scalar_ext_mesh'
+    allocate(request_send_scalar_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array request_send_scalar_ext_mesh'
+    allocate(request_recv_scalar_ext_mesh(num_interfaces_ext_mesh),stat=ier)
+    if( ier /= 0 ) stop 'error allocating array request_recv_scalar_ext_mesh'
 
-  ! ---- arrays to assemble between chunks
-
-  ! communication pattern for faces between chunks
-  integer, dimension(NUMMSGS_FACES), intent(in) :: iprocfrom_faces,iprocto_faces
-
-  ! communication pattern for corners between chunks
-  integer, dimension(NCORNERSCHUNKS), intent(in) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
-
-  ! stored as global parameters
-  integer, intent(inout) :: request_send,request_receive
-  integer, dimension(NUMFACES_SHARED), intent(inout) :: request_send_array,request_receive_array
-
-! local parameters
-
-  ! MPI status of messages to be received
-  integer, dimension(MPI_STATUS_SIZE) :: msg_status
-
-  integer :: ipoin,ipoin2D,ipoin1D
-  integer :: sender,receiver
-  integer :: imsg
-  integer :: icount_faces,npoin2D_chunks
-  integer :: icount_corners
-
-  integer :: ier
-  logical :: flag_result_test
-
-
-! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
-
-! check flag to see if we need to assemble (might be turned off when debugging)
-  if (.not. ACTUALLY_ASSEMBLE_MPI_SLICES)  then
-    iphase_comm = 9999 ! this means everything is finished
-    return
-  endif
-
-! here we have to assemble all the contributions between slices using MPI
-
-!----
-!---- assemble the contributions between slices using MPI
-!----
-
-!----
-!---- first assemble along xi using the 2-D topology
-!----
-
-  if(iphase_comm == 1) then
-
-    ! slices copy the right face into the buffer
-    do ipoin=1,npoin2D_xi(2)
-      buffer_send_faces_scalar(ipoin,1) = array_val(iboolright_xi(ipoin))
-    enddo
-
-    ! send messages forward along each row
-    if(iproc_xi == 0) then
-      sender = MPI_PROC_NULL
-    else
-      sender = addressing(ichunk,iproc_xi - 1,iproc_eta)
-    endif
-    if(iproc_xi == NPROC_XI-1) then
-      receiver = MPI_PROC_NULL
-    else
-      receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
-    endif
-    call MPI_IRECV(buffer_received_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,sender, &
-          itag,MPI_COMM_WORLD,request_receive,ier)
-
-    call MPI_ISEND(buffer_send_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,receiver, &
-          itag2,MPI_COMM_WORLD,request_send,ier)
-
-    iphase_comm = iphase_comm + 1
-    return ! exit because we have started some communications therefore we need some time
-
-  endif !!!!!!!!! end of iphase_comm 1
-
-  if(iphase_comm == 2) then
-
-    ! call MPI_WAIT(request_send,msg_status,ier)
-    ! call MPI_WAIT(request_receive,msg_status,ier)
-    call MPI_TEST(request_send,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not sent yet
-    call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not received yet
-
-    ! all slices add the buffer received to the contributions on the left face
-    if(iproc_xi > 0) then
-      do ipoin=1,npoin2D_xi(1)
-        array_val(iboolleft_xi(ipoin)) = array_val(iboolleft_xi(ipoin)) + &
-                                buffer_received_faces_scalar(ipoin,1)
+    ! partition border copy into the buffer
+    do iinterface = 1, num_interfaces_ext_mesh
+      do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+        buffer_send_scalar_ext_mesh(ipoin,iinterface) = array_val(ibool_interfaces_ext_mesh(ipoin,iinterface))
       enddo
-    endif
-
-    ! the contributions are correctly assembled on the left side of each slice
-    ! now we have to send the result back to the sender
-    ! all slices copy the left face into the buffer
-    do ipoin=1,npoin2D_xi(1)
-      buffer_send_faces_scalar(ipoin,1) = array_val(iboolleft_xi(ipoin))
     enddo
 
-    ! send messages backward along each row
-    if(iproc_xi == NPROC_XI-1) then
-      sender = MPI_PROC_NULL
-    else
-      sender = addressing(ichunk,iproc_xi + 1,iproc_eta)
-    endif
-    if(iproc_xi == 0) then
-      receiver = MPI_PROC_NULL
-    else
-      receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
-    endif
-    call MPI_IRECV(buffer_received_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,sender, &
-          itag,MPI_COMM_WORLD,request_receive,ier)
-
-    call MPI_ISEND(buffer_send_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,receiver, &
-          itag2,MPI_COMM_WORLD,request_send,ier)
-
-    iphase_comm = iphase_comm + 1
-    return ! exit because we have started some communications therefore we need some time
-
-  endif !!!!!!!!! end of iphase_comm 2
-
-  if(iphase_comm == 3) then
-
-    ! call MPI_WAIT(request_send,msg_status,ier)
-    ! call MPI_WAIT(request_receive,msg_status,ier)
-    call MPI_TEST(request_send,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not sent yet
-    call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not received yet
-
-    ! all slices copy the buffer received to the contributions on the right face
-    if(iproc_xi < NPROC_XI-1) then
-      do ipoin=1,npoin2D_xi(2)
-        array_val(iboolright_xi(ipoin)) = buffer_received_faces_scalar(ipoin,1)
-      enddo
-    endif
-
-    !----
-    !---- then assemble along eta using the 2-D topology
-    !----
-
-    ! slices copy the right face into the buffer
-    do ipoin=1,npoin2D_eta(2)
-      buffer_send_faces_scalar(ipoin,1) = array_val(iboolright_eta(ipoin))
+    ! send messages
+    do iinterface = 1, num_interfaces_ext_mesh
+      ! non-blocking synchronous send request
+      call isend_cr(buffer_send_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+           nibool_interfaces_ext_mesh(iinterface), &
+           my_neighbours_ext_mesh(iinterface), &
+           itag, &
+           request_send_scalar_ext_mesh(iinterface) &
+           )
+      ! receive request
+      call irecv_cr(buffer_recv_scalar_ext_mesh(1:nibool_interfaces_ext_mesh(iinterface),iinterface), &
+           nibool_interfaces_ext_mesh(iinterface), &
+           my_neighbours_ext_mesh(iinterface), &
+           itag, &
+           request_recv_scalar_ext_mesh(iinterface) &
+           )
     enddo
 
-    ! send messages forward along each row
-    if(iproc_eta == 0) then
-      sender = MPI_PROC_NULL
-    else
-      sender = addressing(ichunk,iproc_xi,iproc_eta - 1)
-    endif
-    if(iproc_eta == NPROC_ETA-1) then
-      receiver = MPI_PROC_NULL
-    else
-      receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
-    endif
-    call MPI_IRECV(buffer_received_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,sender, &
-      itag,MPI_COMM_WORLD,request_receive,ier)
-
-    call MPI_ISEND(buffer_send_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,receiver, &
-      itag2,MPI_COMM_WORLD,request_send,ier)
-
-    iphase_comm = iphase_comm + 1
-    return ! exit because we have started some communications therefore we need some time
-
-  endif !!!!!!!!! end of iphase_comm 3
-
-  if(iphase_comm == 4) then
-
-    ! call MPI_WAIT(request_send,msg_status,ier)
-    ! call MPI_WAIT(request_receive,msg_status,ier)
-    call MPI_TEST(request_send,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not sent yet
-    call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not received yet
-
-    ! all slices add the buffer received to the contributions on the left face
-    if(iproc_eta > 0) then
-      do ipoin=1,npoin2D_eta(1)
-        array_val(iboolleft_eta(ipoin)) = array_val(iboolleft_eta(ipoin)) + &
-                                buffer_received_faces_scalar(ipoin,1)
-      enddo
-    endif
-
-    ! the contributions are correctly assembled on the left side of each slice
-    ! now we have to send the result back to the sender
-    ! all slices copy the left face into the buffer
-    do ipoin=1,npoin2D_eta(1)
-      buffer_send_faces_scalar(ipoin,1) = array_val(iboolleft_eta(ipoin))
+    ! wait for communications completion (recv)
+    do iinterface = 1, num_interfaces_ext_mesh
+      call wait_req(request_recv_scalar_ext_mesh(iinterface))
     enddo
 
-    ! send messages backward along each row
-    if(iproc_eta == NPROC_ETA-1) then
-      sender = MPI_PROC_NULL
-    else
-      sender = addressing(ichunk,iproc_xi,iproc_eta + 1)
-    endif
-    if(iproc_eta == 0) then
-      receiver = MPI_PROC_NULL
-    else
-      receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
-    endif
-    call MPI_IRECV(buffer_received_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,sender, &
-      itag,MPI_COMM_WORLD,request_receive,ier)
-
-    call MPI_ISEND(buffer_send_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,receiver, &
-      itag2,MPI_COMM_WORLD,request_send,ier)
-
-    iphase_comm = iphase_comm + 1
-    return ! exit because we have started some communications therefore we need some time
-
-  endif !!!!!!!!! end of iphase_comm 4
-
-  if(iphase_comm == 5) then
-
-    ! call MPI_WAIT(request_send,msg_status,ier)
-    ! call MPI_WAIT(request_receive,msg_status,ier)
-    call MPI_TEST(request_send,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not sent yet
-    call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not received yet
-
-    ! all slices copy the buffer received to the contributions on the right face
-    if(iproc_eta < NPROC_ETA-1) then
-      do ipoin=1,npoin2D_eta(2)
-        array_val(iboolright_eta(ipoin)) = buffer_received_faces_scalar(ipoin,1)
+    ! adding contributions of neighbours
+    do iinterface = 1, num_interfaces_ext_mesh
+      do ipoin = 1, nibool_interfaces_ext_mesh(iinterface)
+        array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) = &
+             array_val(ibool_interfaces_ext_mesh(ipoin,iinterface)) + buffer_recv_scalar_ext_mesh(ipoin,iinterface)
       enddo
-    endif
-
-    !----
-    !---- start MPI assembling phase between chunks
-    !----
-
-    ! check flag to see if we need to assemble (might be turned off when debugging)
-    ! and do not assemble if only one chunk
-    if (.not. ACTUALLY_ASSEMBLE_MPI_CHUNKS .or. NCHUNKS == 1) then
-      iphase_comm = 9999 ! this means everything is finished
-      return
-    endif
-
-    ! ***************************************************************
-    !  transmit messages in forward direction (iprocfrom -> iprocto)
-    ! ***************************************************************
-
-    !---- put slices in receive mode
-    !---- a given slice can belong to at most two faces
-
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocto_faces(imsg)) then
-        sender = iprocfrom_faces(imsg)
-        npoin2D_chunks = npoin2D_faces(icount_faces)
-        call MPI_IRECV(buffer_received_faces_scalar(1,icount_faces), &
-                      npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
-                      itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
-    !   do ipoin2D=1,npoin2D_chunks
-    !     array_val(iboolfaces(ipoin2D,icount_faces)) = &
-    !        array_val(iboolfaces(ipoin2D,icount_faces)) + buffer_received_faces_scalar(ipoin2D)
-    !   enddo
-      endif
     enddo
 
-    !---- put slices in send mode
-    !---- a given slice can belong to at most two faces
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocfrom_faces(imsg)) then
-        receiver = iprocto_faces(imsg)
-        npoin2D_chunks = npoin2D_faces(icount_faces)
-        do ipoin2D=1,npoin2D_chunks
-          buffer_send_faces_scalar(ipoin2D,icount_faces) = array_val(iboolfaces(ipoin2D,icount_faces))
-        enddo
-        call MPI_ISEND(buffer_send_faces_scalar(1,icount_faces),npoin2D_chunks, &
-                      CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,request_send_array(icount_faces),ier)
-      endif
+    ! wait for communications completion (send)
+    do iinterface = 1, num_interfaces_ext_mesh
+      call wait_req(request_send_scalar_ext_mesh(iinterface))
     enddo
 
-    iphase_comm = iphase_comm + 1
-    return ! exit because we have started some communications therefore we need some time
+    deallocate(buffer_send_scalar_ext_mesh)
+    deallocate(buffer_recv_scalar_ext_mesh)
+    deallocate(request_send_scalar_ext_mesh)
+    deallocate(request_recv_scalar_ext_mesh)
 
-  endif !!!!!!!!! end of iphase_comm 5
+  endif
 
-  if(iphase_comm == 6) then
+  end subroutine assemble_MPI_scalar_ext_mesh
 
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocto_faces(imsg)) then
-        call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
-        if(.not. flag_result_test) return ! exit if message not received yet
-      endif
-    enddo
-
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocfrom_faces(imsg)) then
-        call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
-        if(.not. flag_result_test) return ! exit if message not sent yet
-      endif
-    enddo
-
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocto_faces(imsg)) then
-        do ipoin2D=1,npoin2D_faces(icount_faces)
-          array_val(iboolfaces(ipoin2D,icount_faces)) = &
-            array_val(iboolfaces(ipoin2D,icount_faces)) + buffer_received_faces_scalar(ipoin2D,icount_faces)
-        enddo
-      endif
-    enddo
-
-    ! *********************************************************************
-    !  transmit messages back in opposite direction (iprocto -> iprocfrom)
-    ! *********************************************************************
-
-    !---- put slices in receive mode
-    !---- a given slice can belong to at most two faces
-
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocfrom_faces(imsg)) then
-        sender = iprocto_faces(imsg)
-        npoin2D_chunks = npoin2D_faces(icount_faces)
-        call MPI_IRECV(buffer_received_faces_scalar(1,icount_faces), &
-                      npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
-                      itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
-      !   do ipoin2D=1,npoin2D_chunks
-      !     array_val(iboolfaces(ipoin2D,icount_faces)) = buffer_received_faces_scalar(ipoin2D)
-      !   enddo
-      endif
-    enddo
-
-    !---- put slices in send mode
-    !---- a given slice can belong to at most two faces
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocto_faces(imsg)) then
-        receiver = iprocfrom_faces(imsg)
-        npoin2D_chunks = npoin2D_faces(icount_faces)
-        do ipoin2D=1,npoin2D_chunks
-          buffer_send_faces_scalar(ipoin2D,icount_faces) = array_val(iboolfaces(ipoin2D,icount_faces))
-        enddo
-        call MPI_ISEND(buffer_send_faces_scalar(1,icount_faces),npoin2D_chunks, &
-                      CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,request_send_array(icount_faces),ier)
-      endif
-    enddo
-
-    iphase_comm = iphase_comm + 1
-    return ! exit because we have started some communications therefore we need some time
-
-  endif !!!!!!!!! end of iphase_comm 6
-
-  if(iphase_comm == 7) then
-
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocto_faces(imsg)) then
-        call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
-        if(.not. flag_result_test) return ! exit if message not received yet
-      endif
-    enddo
-
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocfrom_faces(imsg)) then
-        call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
-        if(.not. flag_result_test) return ! exit if message not sent yet
-      endif
-    enddo
-
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocfrom_faces(imsg)) then
-        do ipoin2D=1,npoin2D_faces(icount_faces)
-          array_val(iboolfaces(ipoin2D,icount_faces)) = buffer_received_faces_scalar(ipoin2D,icount_faces)
-        enddo
-      endif
-    enddo
-
-    ! this is the exit condition, to go beyond the last phase number
-    iphase_comm = iphase_comm + 1
-
-    !! DK DK do the rest in blocking for now, for simplicity
-
-    !----
-    !---- start MPI assembling corners
-    !----
-
-    ! scheme for corners cannot deadlock even if NPROC_XI = NPROC_ETA = 1
-
-    ! ***************************************************************
-    !  transmit messages in forward direction (two workers -> master)
-    ! ***************************************************************
-
-    icount_corners = 0
-
-    do imsg = 1,NCORNERSCHUNKS
-
-      if(myrank == iproc_master_corners(imsg) .or. &
-         myrank == iproc_worker1_corners(imsg) .or. &
-         (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) icount_corners = icount_corners + 1
-
-      !---- receive messages from the two workers on the master
-      if(myrank==iproc_master_corners(imsg)) then
-
-        ! receive from worker #1 and add to local array
-        sender = iproc_worker1_corners(imsg)
-        call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
-              CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
-        do ipoin1D=1,NGLOB1D_RADIAL
-          array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
-                   buffer_recv_chunkcorn_scalar(ipoin1D)
-        enddo
-
-        ! receive from worker #2 and add to local array
-        if(NCHUNKS /= 2) then
-          sender = iproc_worker2_corners(imsg)
-          call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
-                      CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
-          do ipoin1D=1,NGLOB1D_RADIAL
-            array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
-                   buffer_recv_chunkcorn_scalar(ipoin1D)
-          enddo
-        endif
-
-      endif
-
-      !---- send messages from the two workers to the master
-      if(myrank==iproc_worker1_corners(imsg) .or. &
-                  (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
-
-        receiver = iproc_master_corners(imsg)
-        do ipoin1D=1,NGLOB1D_RADIAL
-          buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
-        enddo
-        call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
-                  receiver,itag,MPI_COMM_WORLD,ier)
-
-      endif
-
-      ! *********************************************************************
-      !  transmit messages back in opposite direction (master -> two workers)
-      ! *********************************************************************
-
-      !---- receive messages from the master on the two workers
-      if(myrank==iproc_worker1_corners(imsg) .or. &
-                  (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
-
-      ! receive from master and copy to local array
-        sender = iproc_master_corners(imsg)
-        call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
-              CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
-        do ipoin1D=1,NGLOB1D_RADIAL
-          array_val(iboolcorner(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_scalar(ipoin1D)
-        enddo
-
-      endif
-
-      !---- send messages from the master to the two workers
-      if(myrank==iproc_master_corners(imsg)) then
-
-        do ipoin1D=1,NGLOB1D_RADIAL
-          buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
-        enddo
-
-        ! send to worker #1
-        receiver = iproc_worker1_corners(imsg)
-        call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
-                  receiver,itag,MPI_COMM_WORLD,ier)
-
-        ! send to worker #2
-        if(NCHUNKS /= 2) then
-          receiver = iproc_worker2_corners(imsg)
-          call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
-                      receiver,itag,MPI_COMM_WORLD,ier)
-        endif
-
-      endif
-
-    enddo
-
-  endif !!!!!!!!! end of iphase_comm 7
-
-  end subroutine assemble_MPI_scalar
-
-
 !
 !-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
 !
 
-! daniel: TODO - still local versions
 
 
   subroutine assemble_MPI_scalar_ext_mesh_s(NPROC,NGLOB_AB,array_val, &
@@ -656,7 +197,7 @@
   end subroutine assemble_MPI_scalar_ext_mesh_s
 
 !
-!----
+!-------------------------------------------------------------------------------------------------
 !
 
   subroutine assemble_MPI_scalar_ext_mesh_w(NPROC,NGLOB_AB,array_val, &
@@ -713,14 +254,10 @@
 
   end subroutine assemble_MPI_scalar_ext_mesh_w
 
-
-
 !
 !-------------------------------------------------------------------------------------------------
 !
 
-!daniel: TODO - cuda scalar assembly...
-
   subroutine assemble_MPI_scalar_send_cuda(NPROC, &
                                           buffer_send_scalar_ext_mesh,buffer_recv_scalar_ext_mesh, &
                                           num_interfaces_ext_mesh,max_nibool_interfaces_ext_mesh, &
@@ -734,7 +271,7 @@
   ! sends data
   ! note: assembling data already filled into buffer_send_scalar_ext_mesh array
 
-  use constants
+  use constants_solver
   use specfem_par,only: Mesh_pointer
 
   implicit none
@@ -797,7 +334,7 @@
 
 ! waits for send/receiver to be completed and assembles contributions
 
-  use constants
+  use constants_solver
   use specfem_par,only: Mesh_pointer
 
   implicit none

Deleted: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar_block.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar_block.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar_block.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -1,439 +0,0 @@
-!=====================================================================
-!
-!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
-!          --------------------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!                        Princeton University, USA
-!             and University of Pau / CNRS / INRIA, France
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!----
-!---- assemble the contributions between slices and chunks using MPI
-!----
-
-  subroutine assemble_MPI_scalar_block(myrank,array_val,nglob, &
-            iproc_xi,iproc_eta,ichunk,addressing, &
-            iboolleft_xi,iboolright_xi,iboolleft_eta,iboolright_eta, &
-            npoin2D_faces,npoin2D_xi,npoin2D_eta, &
-            iboolfaces,iboolcorner, &
-            iprocfrom_faces,iprocto_faces,imsg_type, &
-            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
-            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
-            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
-            NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL, &
-            NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY,NCHUNKS)
-
-! this version of the routine is based on blocking MPI calls
-
-  implicit none
-
-! standard include of the MPI library
-  include 'mpif.h'
-
-  include "constants.h"
-  include "precision.h"
-
-  integer myrank,nglob,NCHUNKS
-
-! array to assemble
-  real(kind=CUSTOM_REAL), dimension(nglob) :: array_val
-
-  integer iproc_xi,iproc_eta,ichunk
-  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi,npoin2D_eta
-  integer npoin2D_faces(NUMFACES_SHARED)
-
-  integer NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY
-  integer NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL
-  integer NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS
-
-! for addressing of the slices
-  integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1) :: addressing
-
-! 2-D addressing and buffers for summation between slices
-  integer, dimension(NGLOB2DMAX_XMIN_XMAX) :: iboolleft_xi,iboolright_xi
-  integer, dimension(NGLOB2DMAX_YMIN_YMAX) :: iboolleft_eta,iboolright_eta
-
-! indirect addressing for each corner of the chunks
-  integer, dimension(NGLOB1D_RADIAL,NUMCORNERS_SHARED) :: iboolcorner
-  integer icount_corners
-
-  integer :: npoin2D_max_all_CM_IC
-  integer, dimension(NGLOB2DMAX_XY,NUMFACES_SHARED) :: iboolfaces
-  real(kind=CUSTOM_REAL), dimension(npoin2D_max_all_CM_IC) :: buffer_send_faces_scalar,buffer_received_faces_scalar
-
-! buffers for send and receive between corners of the chunks
-  real(kind=CUSTOM_REAL), dimension(NGLOB1D_RADIAL) :: buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar
-
-! ---- arrays to assemble between chunks
-
-! communication pattern for faces between chunks
-  integer, dimension(NUMMSGS_FACES) :: iprocfrom_faces,iprocto_faces,imsg_type
-
-! communication pattern for corners between chunks
-  integer, dimension(NCORNERSCHUNKS) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
-
-! MPI status of messages to be received
-  integer msg_status(MPI_STATUS_SIZE)
-
-  integer ipoin,ipoin2D,ipoin1D
-  integer sender,receiver,ier
-  integer imsg,imsg_loop
-  integer icount_faces,npoin2D_chunks
-
-! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
-
-! check flag to see if we need to assemble (might be turned off when debugging)
-  if (.not. ACTUALLY_ASSEMBLE_MPI_SLICES) return
-
-! here we have to assemble all the contributions between slices using MPI
-
-!----
-!---- assemble the contributions between slices using MPI
-!----
-
-!----
-!---- first assemble along xi using the 2-D topology
-!----
-
-! assemble along xi only if more than one slice
-  if(NPROC_XI > 1) then
-
-    ! slices copy the right face into the buffer
-    do ipoin=1,npoin2D_xi(2)
-      buffer_send_faces_scalar(ipoin) = array_val(iboolright_xi(ipoin))
-    enddo
-
-    ! send messages forward along each row
-    if(iproc_xi == 0) then
-      sender = MPI_PROC_NULL
-    else
-      sender = addressing(ichunk,iproc_xi - 1,iproc_eta)
-    endif
-    if(iproc_xi == NPROC_XI-1) then
-      receiver = MPI_PROC_NULL
-    else
-      receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
-    endif
-    call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,receiver, &
-          itag2,buffer_received_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,sender, &
-          itag,MPI_COMM_WORLD,msg_status,ier)
-
-    ! all slices add the buffer received to the contributions on the left face
-    if(iproc_xi > 0) then
-      do ipoin=1,npoin2D_xi(1)
-        array_val(iboolleft_xi(ipoin)) = array_val(iboolleft_xi(ipoin)) + &
-                                buffer_received_faces_scalar(ipoin)
-      enddo
-    endif
-
-    ! the contributions are correctly assembled on the left side of each slice
-    ! now we have to send the result back to the sender
-    ! all slices copy the left face into the buffer
-    do ipoin=1,npoin2D_xi(1)
-      buffer_send_faces_scalar(ipoin) = array_val(iboolleft_xi(ipoin))
-    enddo
-
-    ! send messages backward along each row
-    if(iproc_xi == NPROC_XI-1) then
-      sender = MPI_PROC_NULL
-    else
-      sender = addressing(ichunk,iproc_xi + 1,iproc_eta)
-    endif
-    if(iproc_xi == 0) then
-      receiver = MPI_PROC_NULL
-    else
-      receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
-    endif
-    call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,receiver, &
-          itag2,buffer_received_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,sender, &
-          itag,MPI_COMM_WORLD,msg_status,ier)
-
-    ! all slices copy the buffer received to the contributions on the right face
-    if(iproc_xi < NPROC_XI-1) then
-      do ipoin=1,npoin2D_xi(2)
-        array_val(iboolright_xi(ipoin)) = buffer_received_faces_scalar(ipoin)
-      enddo
-    endif
-
-  endif
-
-!----
-!---- then assemble along eta using the 2-D topology
-!----
-
-! assemble along eta only if more than one slice
-  if(NPROC_ETA > 1) then
-
-    ! slices copy the right face into the buffer
-    do ipoin=1,npoin2D_eta(2)
-      buffer_send_faces_scalar(ipoin) = array_val(iboolright_eta(ipoin))
-    enddo
-
-    ! send messages forward along each row
-    if(iproc_eta == 0) then
-      sender = MPI_PROC_NULL
-    else
-      sender = addressing(ichunk,iproc_xi,iproc_eta - 1)
-    endif
-    if(iproc_eta == NPROC_ETA-1) then
-      receiver = MPI_PROC_NULL
-    else
-      receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
-    endif
-    call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,receiver, &
-      itag2,buffer_received_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,sender, &
-      itag,MPI_COMM_WORLD,msg_status,ier)
-
-    ! all slices add the buffer received to the contributions on the left face
-    if(iproc_eta > 0) then
-      do ipoin=1,npoin2D_eta(1)
-        array_val(iboolleft_eta(ipoin)) = array_val(iboolleft_eta(ipoin)) + &
-                                buffer_received_faces_scalar(ipoin)
-      enddo
-    endif
-
-    ! the contributions are correctly assembled on the left side of each slice
-    ! now we have to send the result back to the sender
-    ! all slices copy the left face into the buffer
-    do ipoin=1,npoin2D_eta(1)
-      buffer_send_faces_scalar(ipoin) = array_val(iboolleft_eta(ipoin))
-    enddo
-
-    ! send messages backward along each row
-    if(iproc_eta == NPROC_ETA-1) then
-      sender = MPI_PROC_NULL
-    else
-      sender = addressing(ichunk,iproc_xi,iproc_eta + 1)
-    endif
-    if(iproc_eta == 0) then
-      receiver = MPI_PROC_NULL
-    else
-      receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
-    endif
-    call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,receiver, &
-      itag2,buffer_received_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,sender, &
-      itag,MPI_COMM_WORLD,msg_status,ier)
-
-    ! all slices copy the buffer received to the contributions on the right face
-    if(iproc_eta < NPROC_ETA-1) then
-      do ipoin=1,npoin2D_eta(2)
-        array_val(iboolright_eta(ipoin)) = buffer_received_faces_scalar(ipoin)
-      enddo
-    endif
-
-  endif
-
-!----
-!---- start MPI assembling phase between chunks
-!----
-
-! check flag to see if we need to assemble (might be turned off when debugging)
-! and do not assemble if only one chunk
-  if (.not. ACTUALLY_ASSEMBLE_MPI_CHUNKS .or. NCHUNKS == 1) return
-
-! ***************************************************************
-!  transmit messages in forward direction (iprocfrom -> iprocto)
-! ***************************************************************
-
-!---- put slices in receive mode
-!---- a given slice can belong to at most two faces
-
-! use three step scheme that can never deadlock
-! scheme for faces cannot deadlock even if NPROC_XI = NPROC_ETA = 1
-  do imsg_loop = 1,NUM_MSG_TYPES
-
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. &
-           myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
-        sender = iprocfrom_faces(imsg)
-        npoin2D_chunks = npoin2D_faces(icount_faces)
-        call MPI_RECV(buffer_received_faces_scalar, &
-                  npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
-                  itag,MPI_COMM_WORLD,msg_status,ier)
-        do ipoin2D=1,npoin2D_chunks
-          array_val(iboolfaces(ipoin2D,icount_faces)) = &
-             array_val(iboolfaces(ipoin2D,icount_faces)) + buffer_received_faces_scalar(ipoin2D)
-        enddo
-      endif
-    enddo
-
-    !---- put slices in send mode
-    !---- a given slice can belong to at most two faces
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. &
-           myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
-        receiver = iprocto_faces(imsg)
-        npoin2D_chunks = npoin2D_faces(icount_faces)
-        do ipoin2D=1,npoin2D_chunks
-          buffer_send_faces_scalar(ipoin2D) = array_val(iboolfaces(ipoin2D,icount_faces))
-        enddo
-        call MPI_SEND(buffer_send_faces_scalar,npoin2D_chunks, &
-                  CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
-      endif
-    enddo
-
-    ! *********************************************************************
-    !  transmit messages back in opposite direction (iprocto -> iprocfrom)
-    ! *********************************************************************
-
-    !---- put slices in receive mode
-    !---- a given slice can belong to at most two faces
-
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. &
-           myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
-        sender = iprocto_faces(imsg)
-        npoin2D_chunks = npoin2D_faces(icount_faces)
-        call MPI_RECV(buffer_received_faces_scalar, &
-                  npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
-                  itag,MPI_COMM_WORLD,msg_status,ier)
-        do ipoin2D=1,npoin2D_chunks
-          array_val(iboolfaces(ipoin2D,icount_faces)) = buffer_received_faces_scalar(ipoin2D)
-        enddo
-      endif
-    enddo
-
-    !---- put slices in send mode
-    !---- a given slice can belong to at most two faces
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. &
-           myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
-        receiver = iprocfrom_faces(imsg)
-        npoin2D_chunks = npoin2D_faces(icount_faces)
-        do ipoin2D=1,npoin2D_chunks
-          buffer_send_faces_scalar(ipoin2D) = array_val(iboolfaces(ipoin2D,icount_faces))
-        enddo
-        call MPI_SEND(buffer_send_faces_scalar,npoin2D_chunks, &
-                  CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
-      endif
-    enddo
-
-! end of anti-deadlocking loop
-  enddo
-
-!----
-!---- start MPI assembling corners
-!----
-
-! scheme for corners cannot deadlock even if NPROC_XI = NPROC_ETA = 1
-
-! ***************************************************************
-!  transmit messages in forward direction (two workers -> master)
-! ***************************************************************
-
-  icount_corners = 0
-
-  do imsg = 1,NCORNERSCHUNKS
-
-    if(myrank == iproc_master_corners(imsg) .or. &
-       myrank == iproc_worker1_corners(imsg) .or. &
-       (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) icount_corners = icount_corners + 1
-
-    !---- receive messages from the two workers on the master
-    if(myrank==iproc_master_corners(imsg)) then
-
-      ! receive from worker #1 and add to local array
-      sender = iproc_worker1_corners(imsg)
-      call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
-            CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
-      do ipoin1D=1,NGLOB1D_RADIAL
-        array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
-                 buffer_recv_chunkcorn_scalar(ipoin1D)
-      enddo
-
-      ! receive from worker #2 and add to local array
-      if(NCHUNKS /= 2) then
-        sender = iproc_worker2_corners(imsg)
-        call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
-              CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
-        do ipoin1D=1,NGLOB1D_RADIAL
-          array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
-                   buffer_recv_chunkcorn_scalar(ipoin1D)
-        enddo
-      endif
-
-    endif
-
-    !---- send messages from the two workers to the master
-    if(myrank==iproc_worker1_corners(imsg) .or. &
-                (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
-
-      receiver = iproc_master_corners(imsg)
-      do ipoin1D=1,NGLOB1D_RADIAL
-        buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
-      enddo
-      call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
-                receiver,itag,MPI_COMM_WORLD,ier)
-
-    endif
-
-    ! *********************************************************************
-    !  transmit messages back in opposite direction (master -> two workers)
-    ! *********************************************************************
-
-    !---- receive messages from the master on the two workers
-    if(myrank==iproc_worker1_corners(imsg) .or. &
-                (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
-
-      ! receive from master and copy to local array
-      sender = iproc_master_corners(imsg)
-      call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
-            CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
-      do ipoin1D=1,NGLOB1D_RADIAL
-        array_val(iboolcorner(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_scalar(ipoin1D)
-      enddo
-
-    endif
-
-    !---- send messages from the master to the two workers
-    if(myrank==iproc_master_corners(imsg)) then
-
-      do ipoin1D=1,NGLOB1D_RADIAL
-        buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
-      enddo
-
-      ! send to worker #1
-      receiver = iproc_worker1_corners(imsg)
-      call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
-                receiver,itag,MPI_COMM_WORLD,ier)
-
-      ! send to worker #2
-      if(NCHUNKS /= 2) then
-        receiver = iproc_worker2_corners(imsg)
-        call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
-                  receiver,itag,MPI_COMM_WORLD,ier)
-      endif
-
-    endif
-
-  enddo
-
-  end subroutine assemble_MPI_scalar_block
-

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -27,835 +27,10 @@
 
 !----
 !---- assemble the contributions between slices and chunks using MPI
-!---- we handle two regions (crust/mantle and inner core) in the same MPI call
-!---- to reduce the total number of MPI calls
 !----
 
-  subroutine assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
-                iproc_xi,iproc_eta,ichunk,addressing, &
-                iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,&
-                iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
-                npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
-                iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
-                iboolleft_xi_inner_core,iboolright_xi_inner_core,&
-                iboolleft_eta_inner_core,iboolright_eta_inner_core, &
-                npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
-                iboolfaces_inner_core,iboolcorner_inner_core, &
-                iprocfrom_faces,iprocto_faces, &
-                iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-                buffer_send_faces_vector,buffer_received_faces_vector, &
-                npoin2D_max_all_CM_IC, &
-                buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
-                request_send,request_receive, &
-                request_send_array,request_receive_array, &
-                NUMMSGS_FACES,NCORNERSCHUNKS, &
-                NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL_crust_mantle, &
-                NGLOB1D_RADIAL_inner_core,NCHUNKS,iphase_comm)
+! non-blocking routines
 
-  implicit none
-
-  ! standard include of the MPI library
-  include 'mpif.h'
-
-  include "constants.h"
-  include "precision.h"
-
-  ! include values created by the mesher
-  include "OUTPUT_FILES/values_from_mesher.h"
-
-  integer myrank,NCHUNKS,iphase_comm
-
-  ! the two arrays to assemble
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE), intent(inout) :: accel_crust_mantle
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE), intent(inout) :: accel_inner_core
-
-  integer, intent(in) :: iproc_xi,iproc_eta,ichunk
-  integer, intent(in) :: npoin2D_faces_crust_mantle(NUMFACES_SHARED)
-  integer, intent(in) :: npoin2D_faces_inner_core(NUMFACES_SHARED)
-
-  integer, dimension(NB_SQUARE_EDGES_ONEDIR), intent(in) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
-        npoin2D_xi_inner_core,npoin2D_eta_inner_core
-
-  integer, intent(in) :: NGLOB1D_RADIAL_crust_mantle,NGLOB1D_RADIAL_inner_core,NPROC_XI,NPROC_ETA
-  integer, intent(in) :: NUMMSGS_FACES,NCORNERSCHUNKS
-
-  ! for addressing of the slices
-  integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1), intent(in) :: addressing
-
-  ! 2-D addressing and buffers for summation between slices
-  integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM), intent(in) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
-  integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM), intent(in) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
-  integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC), intent(in) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
-  integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC), intent(in) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
-
-  ! indirect addressing for each corner of the chunks
-  integer, dimension(NGLOB1D_RADIAL_crust_mantle,NUMCORNERS_SHARED), intent(in) :: iboolcorner_crust_mantle
-  integer, dimension(NGLOB1D_RADIAL_inner_core,NUMCORNERS_SHARED), intent(in) :: iboolcorner_inner_core
-
-  integer, intent(in) :: npoin2D_max_all_CM_IC
-  integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED), intent(in) :: iboolfaces_crust_mantle
-  integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED), intent(in) :: iboolfaces_inner_core
-  real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED), intent(inout) :: &
-      buffer_send_faces_vector,buffer_received_faces_vector
-
-  ! buffers for send and receive between corners of the chunks
-  ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_crust_mantle + NGLOB1D_RADIAL_inner_core), intent(inout) :: &
-    buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector
-
-  ! stored as global parameters
-  integer, intent(inout) :: request_send,request_receive
-  integer, dimension(NUMFACES_SHARED), intent(inout) :: request_send_array,request_receive_array
-
-
-  ! ---- arrays to assemble between chunks
-  ! communication pattern for faces between chunks
-  integer, dimension(NUMMSGS_FACES), intent(in) :: iprocfrom_faces,iprocto_faces
-  ! communication pattern for corners between chunks
-  integer, dimension(NCORNERSCHUNKS), intent(in) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
-
-! local parameters
-
-  integer :: icount_corners
-
-  ! MPI status of messages to be received
-  integer, dimension(MPI_STATUS_SIZE) :: msg_status
-
-  integer :: ipoin,ipoin2D,ipoin1D
-  integer :: sender,receiver
-  integer :: imsg
-  integer :: icount_faces,npoin2D_chunks_all
-
-  integer :: NGLOB1D_RADIAL_all
-  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_all,npoin2D_eta_all
-
-  logical :: flag_result_test
-  integer :: ier
-
-  ! daniel: TODO - comment below might be obsolete..
-  ! do not remove the "save" statement because this routine is non blocking
-  ! therefore it needs to find the right value of ioffset when it re-enters
-  ! the routine later to perform the next communication step
-  integer, save :: ioffset
-
-
-  ! check flag to see if we need to assemble (might be turned off when debugging)
-  if (.not. ACTUALLY_ASSEMBLE_MPI_SLICES) then
-    iphase_comm = 9999 ! this means everything is finished
-    return
-  endif
-
-  ! here we have to assemble all the contributions between slices using MPI
-
-  ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
-  npoin2D_xi_all(:) = npoin2D_xi_crust_mantle(:) + npoin2D_xi_inner_core(:)
-  npoin2D_eta_all(:) = npoin2D_eta_crust_mantle(:) + npoin2D_eta_inner_core(:)
-
-  !----
-  !---- assemble the contributions between slices using MPI
-  !----
-
-  !----
-  !---- first assemble along xi using the 2-D topology
-  !----
-  select case( iphase_comm )
-
-  case( 1 )
-
-    ! slices send out values along xi sides (right face) forward along each row
-
-    ! slices copy the right face into the buffer
-    do ipoin = 1,npoin2D_xi_crust_mantle(2)
-      buffer_send_faces_vector(:,ipoin,1) = accel_crust_mantle(:,iboolright_xi_crust_mantle(ipoin))
-    enddo
-
-    ! the buffer for the inner core starts right after the buffer for the crust and mantle
-    ioffset = npoin2D_xi_crust_mantle(2)
-
-    do ipoin = 1,npoin2D_xi_inner_core(2)
-      buffer_send_faces_vector(:,ioffset + ipoin,1) = accel_inner_core(:,iboolright_xi_inner_core(ipoin))
-    enddo
-
-    ! send messages forward along each row
-    if(iproc_xi == 0) then
-      sender = MPI_PROC_NULL
-    else
-      sender = addressing(ichunk,iproc_xi - 1,iproc_eta)
-    endif
-    if(iproc_xi == NPROC_XI-1) then
-      receiver = MPI_PROC_NULL
-    else
-      receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
-    endif
-    ! requests to receive message
-    call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_xi_all(1),CUSTOM_MPI_TYPE,sender, &
-          itag,MPI_COMM_WORLD,request_receive,ier)
-
-    ! sends out buffer
-    call MPI_ISEND(buffer_send_faces_vector,NDIM*npoin2D_xi_all(2),CUSTOM_MPI_TYPE,receiver, &
-          itag2,MPI_COMM_WORLD,request_send,ier)
-
-    iphase_comm = iphase_comm + 1
-    return ! exit because we have started some communications therefore we need some time
-
-
-  case( 2 )
-
-    ! slices assemble along (left face) xi sides
-    ! and send out values along xi sides (left face) backward along each row
-
-    ! checks if messages have been sent out and requested ones received
-    ! call MPI_WAIT(request_send,msg_status,ier)
-    ! call MPI_WAIT(request_receive,msg_status,ier)
-    call MPI_TEST(request_send,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not sent yet
-    call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not received yet
-
-    ! all slices add the buffer received to the contributions on the left face
-    if(iproc_xi > 0) then
-
-      do ipoin = 1,npoin2D_xi_crust_mantle(1)
-        accel_crust_mantle(:,iboolleft_xi_crust_mantle(ipoin)) = &
-                accel_crust_mantle(:,iboolleft_xi_crust_mantle(ipoin)) + &
-                                  buffer_received_faces_vector(:,ipoin,1)
-      enddo
-
-      ! the buffer for the inner core starts right after the buffer for the crust and mantle
-      ioffset = npoin2D_xi_crust_mantle(1)
-
-      do ipoin = 1,npoin2D_xi_inner_core(1)
-        accel_inner_core(:,iboolleft_xi_inner_core(ipoin)) = &
-                  accel_inner_core(:,iboolleft_xi_inner_core(ipoin)) + &
-                                  buffer_received_faces_vector(:,ioffset + ipoin,1)
-      enddo
-
-    endif
-
-    ! the contributions are correctly assembled on the left side of each slice
-    ! now we have to send the result back to the sender
-    ! all slices copy the left face into the buffer
-    do ipoin = 1,npoin2D_xi_crust_mantle(1)
-      buffer_send_faces_vector(:,ipoin,1) = accel_crust_mantle(:,iboolleft_xi_crust_mantle(ipoin))
-    enddo
-
-    ! the buffer for the inner core starts right after the buffer for the crust and mantle
-    ioffset = npoin2D_xi_crust_mantle(1)
-
-    do ipoin = 1,npoin2D_xi_inner_core(1)
-      buffer_send_faces_vector(:,ioffset + ipoin,1) = accel_inner_core(:,iboolleft_xi_inner_core(ipoin))
-    enddo
-
-    ! send messages backward along each row
-    if(iproc_xi == NPROC_XI-1) then
-      sender = MPI_PROC_NULL
-    else
-      sender = addressing(ichunk,iproc_xi + 1,iproc_eta)
-    endif
-    if(iproc_xi == 0) then
-      receiver = MPI_PROC_NULL
-    else
-      receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
-    endif
-    call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_xi_all(2),CUSTOM_MPI_TYPE,sender, &
-          itag,MPI_COMM_WORLD,request_receive,ier)
-
-    call MPI_ISEND(buffer_send_faces_vector,NDIM*npoin2D_xi_all(1),CUSTOM_MPI_TYPE,receiver, &
-          itag2,MPI_COMM_WORLD,request_send,ier)
-
-    iphase_comm = iphase_comm + 1
-    return ! exit because we have started some communications therefore we need some time
-
-  case( 3 )
-
-    ! slices set (right face) xi sides
-    ! and sent out values along eta sides (right face) forward along each column
-
-    ! checks if messages have been sent out and received
-    ! call MPI_WAIT(request_send,msg_status,ier)
-    ! call MPI_WAIT(request_receive,msg_status,ier)
-    call MPI_TEST(request_send,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not sent yet
-    call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not received yet
-
-    ! all slices copy the buffer received to the contributions on the right face
-    if(iproc_xi < NPROC_XI-1) then
-
-      do ipoin = 1,npoin2D_xi_crust_mantle(2)
-        accel_crust_mantle(:,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(:,ipoin,1)
-      enddo
-
-      ! the buffer for the inner core starts right after the buffer for the crust and mantle
-      ioffset = npoin2D_xi_crust_mantle(2)
-
-      do ipoin = 1,npoin2D_xi_inner_core(2)
-        accel_inner_core(:,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(:,ioffset + ipoin,1)
-      enddo
-
-    endif
-
-    !----
-    !---- then assemble along eta using the 2-D topology
-    !----
-
-    ! slices copy the right face into the buffer
-    do ipoin = 1,npoin2D_eta_crust_mantle(2)
-      buffer_send_faces_vector(:,ipoin,1) = accel_crust_mantle(:,iboolright_eta_crust_mantle(ipoin))
-    enddo
-
-    ! the buffer for the inner core starts right after the buffer for the crust and mantle
-    ioffset = npoin2D_eta_crust_mantle(2)
-
-    do ipoin = 1,npoin2D_eta_inner_core(2)
-      buffer_send_faces_vector(:,ioffset + ipoin,1) = accel_inner_core(:,iboolright_eta_inner_core(ipoin))
-    enddo
-
-    ! send messages forward along each row
-    if(iproc_eta == 0) then
-      sender = MPI_PROC_NULL
-    else
-      sender = addressing(ichunk,iproc_xi,iproc_eta - 1)
-    endif
-    if(iproc_eta == NPROC_ETA-1) then
-      receiver = MPI_PROC_NULL
-    else
-      receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
-    endif
-    call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_eta_all(1),CUSTOM_MPI_TYPE,sender, &
-      itag,MPI_COMM_WORLD,request_receive,ier)
-
-    call MPI_ISEND(buffer_send_faces_vector,NDIM*npoin2D_eta_all(2),CUSTOM_MPI_TYPE,receiver, &
-      itag2,MPI_COMM_WORLD,request_send,ier)
-
-    iphase_comm = iphase_comm + 1
-    return ! exit because we have started some communications therefore we need some time
-
-
- case( 4 )
-
-    ! slices assemble along (left face) eta sides
-    ! and sent out values along eta sides (left face) backward along each column
-
-    ! checks if messages have been sent out and received
-    ! call MPI_WAIT(request_send,msg_status,ier)
-    ! call MPI_WAIT(request_receive,msg_status,ier)
-    call MPI_TEST(request_send,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not sent yet
-    call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not received yet
-
-    ! all slices add the buffer received to the contributions on the left face
-    if(iproc_eta > 0) then
-
-      do ipoin = 1,npoin2D_eta_crust_mantle(1)
-        accel_crust_mantle(:,iboolleft_eta_crust_mantle(ipoin)) = &
-                  accel_crust_mantle(:,iboolleft_eta_crust_mantle(ipoin)) + &
-                                  buffer_received_faces_vector(:,ipoin,1)
-      enddo
-
-      ! the buffer for the inner core starts right after the buffer for the crust and mantle
-      ioffset = npoin2D_eta_crust_mantle(1)
-
-      do ipoin = 1,npoin2D_eta_inner_core(1)
-        accel_inner_core(:,iboolleft_eta_inner_core(ipoin)) = &
-                  accel_inner_core(:,iboolleft_eta_inner_core(ipoin)) + &
-                                  buffer_received_faces_vector(:,ioffset + ipoin,1)
-      enddo
-
-    endif
-
-    ! the contributions are correctly assembled on the left side of each slice
-    ! now we have to send the result back to the sender
-    ! all slices copy the left face into the buffer
-    do ipoin = 1,npoin2D_eta_crust_mantle(1)
-      buffer_send_faces_vector(:,ipoin,1) = accel_crust_mantle(:,iboolleft_eta_crust_mantle(ipoin))
-    enddo
-
-    ! the buffer for the inner core starts right after the buffer for the crust and mantle
-    ioffset = npoin2D_eta_crust_mantle(1)
-
-    do ipoin = 1,npoin2D_eta_inner_core(1)
-      buffer_send_faces_vector(:,ioffset + ipoin,1) = accel_inner_core(:,iboolleft_eta_inner_core(ipoin))
-    enddo
-
-    ! send messages backward along each row
-    if(iproc_eta == NPROC_ETA-1) then
-      sender = MPI_PROC_NULL
-    else
-      sender = addressing(ichunk,iproc_xi,iproc_eta + 1)
-    endif
-    if(iproc_eta == 0) then
-      receiver = MPI_PROC_NULL
-    else
-      receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
-    endif
-    call MPI_IRECV(buffer_received_faces_vector,NDIM*npoin2D_eta_all(2),CUSTOM_MPI_TYPE,sender, &
-      itag,MPI_COMM_WORLD,request_receive,ier)
-
-    call MPI_ISEND(buffer_send_faces_vector,NDIM*npoin2D_eta_all(1),CUSTOM_MPI_TYPE,receiver, &
-      itag2,MPI_COMM_WORLD,request_send,ier)
-
-    iphase_comm = iphase_comm + 1
-    return ! exit because we have started some communications therefore we need some time
-
-
-  case( 5 )
-
-    ! slices set (right face) eta sides
-    ! and sent out values for neighbor chunks (iboolfaces)
-
-    ! checks if messages have been sent out and received
-    ! call MPI_WAIT(request_send,msg_status,ier)
-    ! call MPI_WAIT(request_receive,msg_status,ier)
-    call MPI_TEST(request_send,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not sent yet
-    call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
-    if(.not. flag_result_test) return ! exit if message not received yet
-
-    ! all slices copy the buffer received to the contributions on the right face
-    if(iproc_eta < NPROC_ETA-1) then
-
-      do ipoin = 1,npoin2D_eta_crust_mantle(2)
-        accel_crust_mantle(:,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(:,ipoin,1)
-      enddo
-
-      ! the buffer for the inner core starts right after the buffer for the crust and mantle
-      ioffset = npoin2D_eta_crust_mantle(2)
-
-      do ipoin = 1,npoin2D_eta_inner_core(2)
-        accel_inner_core(:,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(:,ioffset + ipoin,1)
-      enddo
-
-    endif
-
-    !----
-    !---- start MPI assembling phase between chunks
-    !----
-
-    ! check flag to see if we need to assemble (might be turned off when debugging)
-    ! and do not assemble if only one chunk
-    if (.not. ACTUALLY_ASSEMBLE_MPI_CHUNKS .or. NCHUNKS == 1) then
-      iphase_comm = 9999 ! this means everything is finished
-      return
-    endif
-
-    ! ***************************************************************
-    !  transmit messages in forward direction (iprocfrom -> iprocto)
-    ! ***************************************************************
-
-    !---- put slices in receive mode
-    !---- a given slice can belong to at most two faces
-
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-
-      if(myrank==iprocto_faces(imsg)) then
-        sender = iprocfrom_faces(imsg)
-
-        ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
-        npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
-
-        call MPI_IRECV(buffer_received_faces_vector(1,1,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,sender, &
-                itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
-
-        !   do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
-        !     accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
-        !        accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) &
-        !           + buffer_received_faces_vector(1,ipoin2D,icount_faces)
-        !     accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
-        !        accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) &
-        !           + buffer_received_faces_vector(2,ipoin2D,icount_faces)
-        !     accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
-        !        accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) &
-        !           + buffer_received_faces_vector(3,ipoin2D,icount_faces)
-        !   enddo
-
-        ! the buffer for the inner core starts right after the buffer for the crust and mantle
-        !   ioffset = npoin2D_faces_crust_mantle(icount_faces)
-
-        !   do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
-        !     accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-        !        accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
-        !          buffer_received_faces_vector(1,ioffset + ipoin2D,icount_faces)
-        !     accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-        !        accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
-        !          buffer_received_faces_vector(2,ioffset + ipoin2D,icount_faces)
-        !     accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-        !        accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
-        !          buffer_received_faces_vector(3,ioffset + ipoin2D,icount_faces)
-        !   enddo
-
-      endif
-    enddo
-
-    !---- put slices in send mode
-    !---- a given slice can belong to at most two faces
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-
-      if(myrank==iprocfrom_faces(imsg)) then
-        receiver = iprocto_faces(imsg)
-
-        ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
-        npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
-
-        do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
-          buffer_send_faces_vector(:,ipoin2D,icount_faces) = &
-            accel_crust_mantle(:,iboolfaces_crust_mantle(ipoin2D,icount_faces))
-        enddo
-
-        ! the buffer for the inner core starts right after the buffer for the crust and mantle
-        ioffset = npoin2D_faces_crust_mantle(icount_faces)
-
-        do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
-          buffer_send_faces_vector(:,ioffset + ipoin2D,icount_faces) = &
-            accel_inner_core(:,iboolfaces_inner_core(ipoin2D,icount_faces))
-        enddo
-
-        call MPI_ISEND(buffer_send_faces_vector(1,1,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,receiver,itag, &
-                         MPI_COMM_WORLD,request_send_array(icount_faces),ier)
-      endif
-    enddo
-
-    iphase_comm = iphase_comm + 1
-    return ! exit because we have started some communications therefore we need some time
-
-  case( 6 )
-
-    ! receiver slices on chunk faces assemble values (iboolfaces)
-    ! and send values back to senders
-
-    ! checks if messages have been received
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocto_faces(imsg)) then
-        call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
-        if(.not. flag_result_test) return ! exit if message not received yet
-      endif
-    enddo
-
-    ! checks if messages have been sent out
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocfrom_faces(imsg)) then
-        call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
-        if(.not. flag_result_test) return ! exit if message not sent yet
-      endif
-    enddo
-
-    ! assembles values on chunk faces
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocto_faces(imsg)) then
-
-        do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
-          accel_crust_mantle(:,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
-             accel_crust_mantle(:,iboolfaces_crust_mantle(ipoin2D,icount_faces)) &
-             + buffer_received_faces_vector(:,ipoin2D,icount_faces)
-        enddo
-
-        ! the buffer for the inner core starts right after the buffer for the crust and mantle
-        ioffset = npoin2D_faces_crust_mantle(icount_faces)
-
-        do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
-          accel_inner_core(:,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-             accel_inner_core(:,iboolfaces_inner_core(ipoin2D,icount_faces)) + &
-               buffer_received_faces_vector(:,ioffset + ipoin2D,icount_faces)
-        enddo
-
-      endif
-    enddo
-
-    ! *********************************************************************
-    !  transmit messages back in opposite direction (iprocto -> iprocfrom)
-    ! *********************************************************************
-
-    !---- put slices in receive mode
-    !---- a given slice can belong to at most two faces
-
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocfrom_faces(imsg)) then
-        sender = iprocto_faces(imsg)
-
-        ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
-        npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
-
-        call MPI_IRECV(buffer_received_faces_vector(1,1,icount_faces),NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,sender, &
-                  itag,MPI_COMM_WORLD,request_receive_array(icount_faces),ier)
-
-        !   do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
-        !     accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
-        !       buffer_received_faces_vector(1,ipoin2D,icount_faces)
-        !     accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
-        !       buffer_received_faces_vector(2,ipoin2D,icount_faces)
-        !     accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
-        !       buffer_received_faces_vector(3,ipoin2D,icount_faces)
-        !   enddo
-
-        ! the buffer for the inner core starts right after the buffer for the crust and mantle
-        !   ioffset = npoin2D_faces_crust_mantle(icount_faces)
-
-        !   do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
-        !     accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-        !       buffer_received_faces_vector(1,ioffset + ipoin2D,icount_faces)
-        !     accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-        !       buffer_received_faces_vector(2,ioffset + ipoin2D,icount_faces)
-        !     accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-        !       buffer_received_faces_vector(3,ioffset + ipoin2D,icount_faces)
-        !   enddo
-
-      endif
-    enddo
-
-    !---- put slices in send mode
-    !---- a given slice can belong to at most two faces
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocto_faces(imsg)) then
-        receiver = iprocfrom_faces(imsg)
-
-        ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
-        npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
-
-        do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
-          buffer_send_faces_vector(:,ipoin2D,icount_faces) = &
-            accel_crust_mantle(:,iboolfaces_crust_mantle(ipoin2D,icount_faces))
-        enddo
-
-        ! the buffer for the inner core starts right after the buffer for the crust and mantle
-        ioffset = npoin2D_faces_crust_mantle(icount_faces)
-
-        do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
-          buffer_send_faces_vector(:,ioffset + ipoin2D,icount_faces) = &
-            accel_inner_core(:,iboolfaces_inner_core(ipoin2D,icount_faces))
-        enddo
-
-        call MPI_ISEND(buffer_send_faces_vector(1,1,icount_faces),NDIM*npoin2D_chunks_all, &
-                        CUSTOM_MPI_TYPE,receiver,itag, &
-                         MPI_COMM_WORLD,request_send_array(icount_faces),ier)
-      endif
-    enddo
-
-    iphase_comm = iphase_comm + 1
-    return ! exit because we have started some communications therefore we need some time
-
-  case( 7 )
-
-    ! sender slices on chunk faces set values (iboolfaces)
-    ! and slices on corners assemble values on corners with other chunks
-
-    ! checks if messages have been sent
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocto_faces(imsg)) then
-        call MPI_TEST(request_send_array(icount_faces),flag_result_test,msg_status,ier)
-        if(.not. flag_result_test) return ! exit if message not received yet
-      endif
-    enddo
-
-    ! checks if messages have been received
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocfrom_faces(imsg)) then
-        call MPI_TEST(request_receive_array(icount_faces),flag_result_test,msg_status,ier)
-        if(.not. flag_result_test) return ! exit if message not sent yet
-      endif
-    enddo
-
-    ! sets values on faces (iboolfaces)
-    icount_faces = 0
-    do imsg = 1,NUMMSGS_FACES
-      if(myrank==iprocfrom_faces(imsg) .or. myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
-      if(myrank==iprocfrom_faces(imsg)) then
-        do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
-          accel_crust_mantle(:,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
-            buffer_received_faces_vector(:,ipoin2D,icount_faces)
-        enddo
-
-        ! the buffer for the inner core starts right after the buffer for the crust and mantle
-        ioffset = npoin2D_faces_crust_mantle(icount_faces)
-
-        do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
-          accel_inner_core(:,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
-            buffer_received_faces_vector(:,ioffset + ipoin2D,icount_faces)
-        enddo
-      endif
-    enddo
-
-    ! this is the exit condition, to go beyond the last phase number
-    iphase_comm = iphase_comm + 1
-
-    !! DK DK do the rest in blocking for now, for simplicity
-
-    !----
-    !---- start MPI assembling corners
-    !----
-
-    ! scheme for corners cannot deadlock even if NPROC_XI = NPROC_ETA = 1
-
-    ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
-    NGLOB1D_RADIAL_all = NGLOB1D_RADIAL_crust_mantle + NGLOB1D_RADIAL_inner_core
-
-    ! the buffer for the inner core starts right after the buffer for the crust and mantle
-    ioffset = NGLOB1D_RADIAL_crust_mantle
-
-    ! ***************************************************************
-    !  transmit messages in forward direction (two workers -> master)
-    ! ***************************************************************
-
-    icount_corners = 0
-
-    do imsg = 1,NCORNERSCHUNKS
-
-      if(myrank == iproc_master_corners(imsg) .or. &
-         myrank == iproc_worker1_corners(imsg) .or. &
-         (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) icount_corners = icount_corners + 1
-
-      !---- receive messages from the two workers on the master
-      if(myrank==iproc_master_corners(imsg)) then
-
-        ! receive from worker #1 and add to local array
-        sender = iproc_worker1_corners(imsg)
-
-        call MPI_RECV(buffer_recv_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
-              CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
-
-        do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
-          accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
-                   accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
-                   buffer_recv_chunkcorn_vector(:,ipoin1D)
-        enddo
-
-        do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
-          accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
-                   accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
-                   buffer_recv_chunkcorn_vector(:,ioffset + ipoin1D)
-        enddo
-
-        ! receive from worker #2 and add to local array
-        if(NCHUNKS /= 2) then
-
-          sender = iproc_worker2_corners(imsg)
-
-          call MPI_RECV(buffer_recv_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
-                CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
-
-          do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
-            accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
-                     accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
-                     buffer_recv_chunkcorn_vector(:,ipoin1D)
-          enddo
-
-          do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
-            accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
-                     accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
-                     buffer_recv_chunkcorn_vector(:,ioffset + ipoin1D)
-          enddo
-
-        endif
-
-      endif
-
-      !---- send messages from the two workers to the master
-      if(myrank==iproc_worker1_corners(imsg) .or. &
-                  (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
-
-        receiver = iproc_master_corners(imsg)
-
-        do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
-          buffer_send_chunkcorn_vector(:,ipoin1D) = &
-            accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners))
-        enddo
-
-        do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
-          buffer_send_chunkcorn_vector(:,ioffset + ipoin1D) = &
-            accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners))
-        enddo
-
-        call MPI_SEND(buffer_send_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
-                      CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
-
-      endif
-
-      ! *********************************************************************
-      !  transmit messages back in opposite direction (master -> two workers)
-      ! *********************************************************************
-
-      !---- receive messages from the master on the two workers
-      if(myrank==iproc_worker1_corners(imsg) .or. &
-                  (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
-
-        ! receive from master and copy to local array
-        sender = iproc_master_corners(imsg)
-
-        call MPI_RECV(buffer_recv_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
-              CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
-
-        do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
-          accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
-            buffer_recv_chunkcorn_vector(:,ipoin1D)
-        enddo
-
-        do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
-          accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
-            buffer_recv_chunkcorn_vector(:,ioffset + ipoin1D)
-        enddo
-
-      endif
-
-      !---- send messages from the master to the two workers
-      if(myrank==iproc_master_corners(imsg)) then
-
-        do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
-          buffer_send_chunkcorn_vector(:,ipoin1D) = &
-            accel_crust_mantle(:,iboolcorner_crust_mantle(ipoin1D,icount_corners))
-        enddo
-
-        do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
-          buffer_send_chunkcorn_vector(:,ioffset + ipoin1D) = &
-            accel_inner_core(:,iboolcorner_inner_core(ipoin1D,icount_corners))
-        enddo
-
-        ! send to worker #1
-        receiver = iproc_worker1_corners(imsg)
-        call MPI_SEND(buffer_send_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all,&
-                    CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
-
-        ! send to worker #2
-        if(NCHUNKS /= 2) then
-          receiver = iproc_worker2_corners(imsg)
-          call MPI_SEND(buffer_send_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all,&
-                      CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
-        endif
-
-      endif
-
-    enddo
-
-  end select
-
-  end subroutine assemble_MPI_vector
-
-!
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!
-!
-
-!daniel: todo - still local versions
-
   subroutine assemble_MPI_vector_ext_mesh_s(NPROC,NGLOB_AB, &
                                            array_val, &
                                            buffer_send_vector_ext_mesh,buffer_recv_vector_ext_mesh, &
@@ -864,7 +39,7 @@
                                            my_neighbours_ext_mesh, &
                                            request_send_vector_ext_mesh,request_recv_vector_ext_mesh)
 
-  ! sends data
+! sends data
 
   implicit none
 
@@ -924,7 +99,6 @@
 !-------------------------------------------------------------------------------------------------
 !
 
-!daniel: TODO - cuda scalar assembly...
 
 ! interrupt might improve MPI performance
 ! see: https://computing.llnl.gov/tutorials/mpi_performance/#Sender-ReceiverSync
@@ -942,7 +116,7 @@
 
   ! sends data
   ! note: array to assemble already filled into buffer_send_vector_ext_mesh array
-  use constants
+  use constants_solver
   use specfem_par,only: Mesh_pointer
 
   implicit none
@@ -1066,7 +240,8 @@
                                             IREGION,FORWARD_OR_ADJOINT )
 
 ! waits for data to receive and assembles
-  use constants
+
+  use constants_solver
   use specfem_par,only: Mesh_pointer
 
   implicit none
@@ -1118,72 +293,3 @@
 
   end subroutine assemble_MPI_vector_write_cuda
 
-!
-!----
-!
-
-  subroutine irecv_cr(recvbuf, recvcount, dest, recvtag, req)
-
-  implicit none
-
-! standard include of the MPI library
-  include 'mpif.h'
-
-  include "constants.h"
-  include "precision.h"
-
-  integer recvcount, dest, recvtag, req
-  real(kind=CUSTOM_REAL), dimension(recvcount) :: recvbuf
-
-  integer ier
-
-  call MPI_IRECV(recvbuf(1),recvcount,CUSTOM_MPI_TYPE,dest,recvtag, &
-                  MPI_COMM_WORLD,req,ier)
-
-  end subroutine irecv_cr
-
-!
-!----
-!
-
-  subroutine isend_cr(sendbuf, sendcount, dest, sendtag, req)
-
-  implicit none
-
-! standard include of the MPI library
-  include 'mpif.h'
-
-  include "constants.h"
-  include "precision.h"
-
-  integer sendcount, dest, sendtag, req
-  real(kind=CUSTOM_REAL), dimension(sendcount) :: sendbuf
-
-  integer ier
-
-  call MPI_ISEND(sendbuf(1),sendcount,CUSTOM_MPI_TYPE,dest,sendtag, &
-                  MPI_COMM_WORLD,req,ier)
-
-  end subroutine isend_cr
-
-!
-!----
-!
-
-  subroutine wait_req(req)
-
-  implicit none
-
-! standard include of the MPI library
-  include 'mpif.h'
-
-  integer :: req
-
-  integer, dimension(MPI_STATUS_SIZE) :: req_mpi_status
-
-  integer :: ier
-
-  call mpi_wait(req,req_mpi_status,ier)
-
-  end subroutine wait_req
-

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/check_simulation_stability.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -33,7 +33,7 @@
                           SIMULATION_TYPE,OUTPUT_FILES,time_start,DT,t0,NSTEP, &
                           myrank)
 
-  use constants
+  use constants_solver
   use specfem_par,only: GPU_MODE,Mesh_pointer
 
   implicit none

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_element.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_element.F90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_element.F90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -1331,29 +1331,29 @@
     do i_SLS = imodulo_N_SLS+1,N_SLS,3
       R_xx_val1 = R_xx_loc(i_SLS)
       R_yy_val1 = R_yy_loc(i_SLS)
-      sigma_xx = sigma_xx - R_xx_val1
-      sigma_yy = sigma_yy - R_yy_val1
-      sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1
+      
+      i_SLS1=i_SLS+1
+      R_xx_val2 = R_xx_loc(i_SLS1)
+      R_yy_val2 = R_yy_loc(i_SLS1)
+      
+      i_SLS2 =i_SLS+2
+      R_xx_val3 = R_xx_loc(i_SLS2)
+      R_yy_val3 = R_yy_loc(i_SLS2)
+
+      sigma_xx = sigma_xx - R_xx_val1 - R_xx_val2 - R_xx_val3
+      sigma_yy = sigma_yy - R_yy_val1 - R_yy_val2 - R_yy_val3
+      sigma_zz = sigma_zz + R_xx_val1 + R_yy_val1 &
+                          + R_xx_val2 + R_yy_val2 &
+                          + R_xx_val3 + R_yy_val3
+                          
       sigma_xy = sigma_xy - R_xy_loc(i_SLS)
       sigma_xz = sigma_xz - R_xz_loc(i_SLS)
       sigma_yz = sigma_yz - R_yz_loc(i_SLS)
 
-      i_SLS1=i_SLS+1
-      R_xx_val2 = R_xx_loc(i_SLS1)
-      R_yy_val2 = R_yy_loc(i_SLS1)
-      sigma_xx = sigma_xx - R_xx_val2
-      sigma_yy = sigma_yy - R_yy_val2
-      sigma_zz = sigma_zz + R_xx_val2 + R_yy_val2
       sigma_xy = sigma_xy - R_xy_loc(i_SLS1)
       sigma_xz = sigma_xz - R_xz_loc(i_SLS1)
       sigma_yz = sigma_yz - R_yz_loc(i_SLS1)
 
-      i_SLS2 =i_SLS+2
-      R_xx_val3 = R_xx_loc(i_SLS2)
-      R_yy_val3 = R_yy_loc(i_SLS2)
-      sigma_xx = sigma_xx - R_xx_val3
-      sigma_yy = sigma_yy - R_yy_val3
-      sigma_zz = sigma_zz + R_xx_val3 + R_yy_val3
       sigma_xy = sigma_xy - R_xy_loc(i_SLS2)
       sigma_xz = sigma_xz - R_xz_loc(i_SLS2)
       sigma_yz = sigma_yz - R_yz_loc(i_SLS2)

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_acoustic.F90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -290,7 +290,7 @@
   subroutine compute_forces_ac_update_veloc(NGLOB,veloc_outer_core,accel_outer_core, &
                                           deltatover2,rmass_outer_core)
 
-  use constants,only: CUSTOM_REAL
+  use constants_solver,only: CUSTOM_REAL
 
 #ifdef _HANDOPT
   use specfem_par,only: imodulo_NGLOB_OUTER_CORE

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -35,13 +35,12 @@
                                         alphaval,betaval,gammaval, &
                                         factor_common,vx,vy,vz,vnspec)
 
-  use constants
+  use constants_solver
 
   use specfem_par,only: &
     hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
     wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
-    minus_gravity_table,density_table,minus_deriv_gravity_table, &
-    npoin2D_max_all_CM_IC,INCLUDE_CENTRAL_CUBE
+    minus_gravity_table,density_table,minus_deriv_gravity_table
 
   use specfem_par_crustmantle,only: &
     xstore => xstore_crust_mantle,ystore => ystore_crust_mantle,zstore => zstore_crust_mantle, &
@@ -61,9 +60,6 @@
     ibool => ibool_crust_mantle, &
     ispec_is_tiso => ispec_is_tiso_crust_mantle, &
     one_minus_sum_beta => one_minus_sum_beta_crust_mantle, &
-    iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
-    npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
-    iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
     phase_ispec_inner => phase_ispec_inner_crust_mantle, &
     nspec_outer => nspec_outer_crust_mantle, &
     nspec_inner => nspec_inner_crust_mantle

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_Dev.F90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -46,12 +46,11 @@
 
 ! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
 
-  use constants
+  use constants_solver
 
   use specfem_par,only: &
     hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
-    minus_gravity_table,density_table,minus_deriv_gravity_table, &
-    npoin2D_max_all_CM_IC,INCLUDE_CENTRAL_CUBE
+    minus_gravity_table,density_table,minus_deriv_gravity_table
 
   use specfem_par_crustmantle,only: &
     xstore => xstore_crust_mantle,ystore => ystore_crust_mantle,zstore => zstore_crust_mantle, &
@@ -71,13 +70,10 @@
     ibool => ibool_crust_mantle, &
     ispec_is_tiso => ispec_is_tiso_crust_mantle, &
     one_minus_sum_beta => one_minus_sum_beta_crust_mantle, &
-    iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
-    npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
-    iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
     phase_ispec_inner => phase_ispec_inner_crust_mantle, &
     nspec_outer => nspec_outer_crust_mantle, &
     nspec_inner => nspec_inner_crust_mantle
-
+    
   implicit none
 
   integer :: NSPEC,NGLOB,NSPEC_ATT

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_elastic.F90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -218,26 +218,30 @@
        if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_crust_mantle()
 
        ! add the sources
-       if (SIMULATION_TYPE == 1 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) &
-            call compute_add_sources()
 
        ! add adjoint sources
        if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
           if( nadj_rec_local > 0 ) call compute_add_sources_adjoint()
        endif
 
-       ! add sources for backward/reconstructed wavefield
-       if (SIMULATION_TYPE == 3 .and. NOISE_TOMOGRAPHY == 0 .and. nsources_local > 0) &
+       ! add the sources
+       select case( NOISE_TOMOGRAPHY )
+       case( 0 )
+          ! regular forward or backward simulation, no noise tomography simulation
+          ! adds sources for forward simulation
+          if (SIMULATION_TYPE == 1 .and. nsources_local > 0) &
+            call compute_add_sources()
+          ! add sources for backward/reconstructed wavefield
+          if (SIMULATION_TYPE == 3 .and. nsources_local > 0) &
             call compute_add_sources_backward()
-
-       ! NOISE_TOMOGRAPHY
-       select case( NOISE_TOMOGRAPHY )
+                   
        case( 1 )
           ! the first step of noise tomography is to use |S(\omega)|^2 as a point force source at one of the receivers.
           ! hence, instead of a moment tensor 'sourcearrays', a 'noise_sourcearray' for a point force is needed.
           ! furthermore, the CMTSOLUTION needs to be zero, i.e., no earthquakes.
           ! now this must be manually set in DATA/CMTSOLUTION, by USERS.
           call noise_add_source_master_rec()
+          
        case( 2 )
           ! second step of noise tomography, i.e., read the surface movie saved at every timestep
           ! use the movie to drive the ensemble forward wavefield
@@ -247,12 +251,14 @@
           ! note the ensemble forward sources are generally distributed on the surface of the earth
           ! that's to say, the ensemble forward source is kind of a surface force density, not a body force density
           ! therefore, we must add it here, before applying the inverse of mass matrix
+          
        case( 3 )
           ! third step of noise tomography, i.e., read the surface movie saved at every timestep
           ! use the movie to reconstruct the ensemble forward wavefield
           ! the ensemble adjoint wavefield is done as usual
           ! note instead of "NSTEP-it+1", now we us "it", since reconstruction is a reversal of reversal
           call noise_read_add_surface_movie(NGLOB_CRUST_MANTLE_ADJOINT,b_accel_crust_mantle,it)
+          
        end select
 
 
@@ -495,21 +501,18 @@
   ! couples ocean with crust mantle
   ! (updates acceleration with ocean load approximation)
   if(OCEANS_VAL) then
-     if(.NOT. GPU_MODE) then
-        ! on CPU
-
-        call compute_coupling_ocean(accel_crust_mantle,b_accel_crust_mantle, &
+    if(.NOT. GPU_MODE) then
+      ! on CPU
+      call compute_coupling_ocean(accel_crust_mantle,b_accel_crust_mantle, &
              rmass_crust_mantle,rmass_ocean_load,normal_top_crust_mantle, &
              ibool_crust_mantle,ibelm_top_crust_mantle, &
              updated_dof_ocean_load, &
              SIMULATION_TYPE,NSPEC2D_TOP(IREGION_CRUST_MANTLE))
 
-     else
-        ! on GPU
-
-        call compute_coupling_ocean_cuda(Mesh_pointer)
-
-     endif
+    else
+      ! on GPU
+      call compute_coupling_ocean_cuda(Mesh_pointer)
+    endif
   endif
 
   ! Newmark time scheme:
@@ -539,7 +542,7 @@
   subroutine compute_forces_el_update_accel(NGLOB,veloc_crust_mantle,accel_crust_mantle, &
                                            two_omega_earth,rmass_crust_mantle)
 
-  use constants,only: CUSTOM_REAL,NDIM
+  use constants_solver,only: CUSTOM_REAL,NDIM
 
 #ifdef _HANDOPT
   use specfem_par,only: imodulo_NGLOB_CRUST_MANTLE4
@@ -619,7 +622,7 @@
                                             NGLOB_IC,veloc_inner_core,accel_inner_core, &
                                             deltatover2,two_omega_earth,rmass_inner_core)
 
-  use constants,only: CUSTOM_REAL,NDIM
+  use constants_solver,only: CUSTOM_REAL,NDIM
 
 #ifdef _HANDOPT
   use specfem_par,only: imodulo_NGLOB_CRUST_MANTLE4,imodulo_NGLOB_INNER_CORE

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -35,13 +35,12 @@
                                         alphaval,betaval,gammaval,factor_common, &
                                         vx,vy,vz,vnspec)
 
-  use constants
+  use constants_solver
 
   use specfem_par,only: &
     hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
     wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
-    minus_gravity_table,density_table,minus_deriv_gravity_table, &
-    npoin2D_max_all_CM_IC,INCLUDE_CENTRAL_CUBE
+    minus_gravity_table,density_table,minus_deriv_gravity_table
 
   use specfem_par_innercore,only: &
     xstore => xstore_inner_core,ystore => ystore_inner_core,zstore => zstore_inner_core, &
@@ -54,23 +53,10 @@
     c33store => c33store_inner_core,c44store => c44store_inner_core, &
     ibool => ibool_inner_core,idoubling => idoubling_inner_core, &
     one_minus_sum_beta => one_minus_sum_beta_inner_core, &
-    ibool_inner_core, &
-    iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
-    npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
-    iboolfaces_inner_core,iboolcorner_inner_core, &
-    nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
-    npoin2D_cube_from_slices, &
-    ibool_central_cube, &
-    receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC, &
     phase_ispec_inner => phase_ispec_inner_inner_core, &
     nspec_outer => nspec_outer_inner_core, &
     nspec_inner => nspec_inner_inner_core
-
-!  use specfem_par_crustmantle,only: &
-!    iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
-!    npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
-!    iboolfaces_crust_mantle,iboolcorner_crust_mantle
-
+    
   implicit none
 
   integer :: NSPEC,NGLOB,NSPEC_ATT

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_Dev.F90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -46,12 +46,11 @@
 
 ! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
 
-  use constants
+  use constants_solver
 
   use specfem_par,only: &
     hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
-    minus_gravity_table,density_table,minus_deriv_gravity_table, &
-    npoin2D_max_all_CM_IC,INCLUDE_CENTRAL_CUBE
+    minus_gravity_table,density_table,minus_deriv_gravity_table
 
   use specfem_par_innercore,only: &
     xstore => xstore_inner_core,ystore => ystore_inner_core,zstore => zstore_inner_core, &
@@ -64,24 +63,10 @@
     c33store => c33store_inner_core,c44store => c44store_inner_core, &
     ibool => ibool_inner_core,idoubling => idoubling_inner_core, &
     one_minus_sum_beta => one_minus_sum_beta_inner_core, &
-    ibool_inner_core, &
-    iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
-    npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
-    iboolfaces_inner_core,iboolcorner_inner_core, &
-    nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
-    npoin2D_cube_from_slices, &
-    ibool_central_cube, &
-    receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC, &
     phase_ispec_inner => phase_ispec_inner_inner_core, &
     nspec_outer => nspec_outer_inner_core, &
     nspec_inner => nspec_inner_inner_core
 
-
-!  use specfem_par_crustmantle,only: &
-!    iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
-!    npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
-!    iboolfaces_crust_mantle,iboolcorner_crust_mantle
-
   implicit none
 
   integer :: NSPEC,NGLOB,NSPEC_ATT

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -31,14 +31,13 @@
                                       displfluid,accelfluid, &
                                       div_displfluid,phase_is_inner)
 
-  use constants
+  use constants_solver
 
   use specfem_par,only: &
     hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
     wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
     minus_rho_g_over_kappa_fluid,d_ln_density_dr_table, &
-    MOVIE_VOLUME, &
-    npoin2D_max_all_CM_IC
+    MOVIE_VOLUME    
 
   use specfem_par_outercore,only: &
     xstore => xstore_outer_core,ystore => ystore_outer_core,zstore => zstore_outer_core, &
@@ -46,13 +45,10 @@
     etax => etax_outer_core,etay => etay_outer_core,etaz => etaz_outer_core, &
     gammax => gammax_outer_core,gammay => gammay_outer_core,gammaz => gammaz_outer_core, &
     ibool => ibool_outer_core, &
-    iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
-    npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
-    iboolfaces_outer_core,iboolcorner_outer_core, &
     phase_ispec_inner => phase_ispec_inner_outer_core, &
     nspec_outer => nspec_outer_outer_core, &
     nspec_inner => nspec_inner_outer_core
-
+    
   implicit none
 
   integer :: NSPEC,NGLOB

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_Dev.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -33,13 +33,12 @@
 
 ! this routine is optimized for NGLLX = NGLLY = NGLLZ = 5 using the Deville et al. (2002) inlined matrix-matrix products
 
-  use constants
+  use constants_solver
 
   use specfem_par,only: &
     hprime_xx,hprime_xxT,hprimewgll_xx,hprimewgll_xxT,wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
     minus_rho_g_over_kappa_fluid,d_ln_density_dr_table, &
-    MOVIE_VOLUME, &
-    npoin2D_max_all_CM_IC
+    MOVIE_VOLUME
 
   use specfem_par_outercore,only: &
     xstore => xstore_outer_core,ystore => ystore_outer_core,zstore => zstore_outer_core, &
@@ -47,14 +46,10 @@
     etax => etax_outer_core,etay => etay_outer_core,etaz => etaz_outer_core, &
     gammax => gammax_outer_core,gammay => gammay_outer_core,gammaz => gammaz_outer_core, &
     ibool => ibool_outer_core, &
-    iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
-    npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
-    iboolfaces_outer_core,iboolcorner_outer_core, &
     phase_ispec_inner => phase_ispec_inner_outer_core, &
     nspec_outer => nspec_outer_outer_core, &
     nspec_inner => nspec_inner_outer_core
-
-
+    
   implicit none
 
   integer :: NSPEC,NGLOB

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_kernels.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -63,7 +63,7 @@
 
   subroutine compute_kernels_crust_mantle()
 
-  use constants
+  use constants_solver
   use specfem_par,only: deltat
   use specfem_par,only: GPU_MODE,Mesh_pointer
   use specfem_par_crustmantle
@@ -193,7 +193,7 @@
 
   subroutine compute_kernels_outer_core()
 
-  use constants
+  use constants_solver
   use specfem_par,only: deltat,hprime_xx,hprime_yy,hprime_zz,myrank
   use specfem_par,only: GPU_MODE,Mesh_pointer
   use specfem_par_outercore
@@ -443,7 +443,7 @@
 
   subroutine compute_kernels_inner_core()
 
-  use constants
+  use constants_solver
   use specfem_par,only: deltat
   use specfem_par,only: GPU_MODE,Mesh_pointer
   use specfem_par_innercore
@@ -575,7 +575,7 @@
 
   subroutine compute_kernels_hessian()
 
-  use constants
+  use constants_solver
   use specfem_par,only: deltat
   use specfem_par,only: GPU_MODE,Mesh_pointer
   use specfem_par_crustmantle

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_crust_mantle.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -27,7 +27,7 @@
 
   subroutine compute_stacey_crust_mantle()
 
-  use constants
+  use constants_solver
 
   use specfem_par,only: &
     ichunk,SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it, &

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_stacey_outer_core.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -28,7 +28,7 @@
 
   subroutine compute_stacey_outer_core()
 
-  use constants
+  use constants_solver
 
   use specfem_par,only: &
     ichunk,SIMULATION_TYPE,SAVE_FORWARD,NSTEP,it, &

Deleted: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/create_central_cube_buffers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/create_central_cube_buffers.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/create_central_cube_buffers.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -1,628 +0,0 @@
-!=====================================================================
-!
-!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
-!          --------------------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!                        Princeton University, USA
-!             and University of Pau / CNRS / INRIA, France
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-!
-!--- create buffers to assemble with central cube
-!
-
-  subroutine create_central_cube_buffers(myrank,iproc_xi,iproc_eta,ichunk, &
-       NPROC_XI,NPROC_ETA,NCHUNKS,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
-       NSPEC2DMAX_XMIN_XMAX_INNER_CORE,NSPEC2DMAX_YMIN_YMAX_INNER_CORE,NSPEC2D_BOTTOM_INNER_CORE, &
-       addressing,ibool_inner_core,idoubling_inner_core, &
-       xstore_inner_core,ystore_inner_core,zstore_inner_core, &
-       nspec2D_xmin_inner_core,nspec2D_xmax_inner_core,nspec2D_ymin_inner_core,nspec2D_ymax_inner_core, &
-       ibelm_xmin_inner_core,ibelm_xmax_inner_core,ibelm_ymin_inner_core,ibelm_ymax_inner_core,ibelm_bottom_inner_core, &
-       nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices, &
-       receiver_cube_from_slices,sender_from_slices_to_cube,ibool_central_cube, &
-       buffer_slices,buffer_slices2,buffer_all_cube_from_slices)
-
-  implicit none
-
-! standard include of the MPI library
-  include 'mpif.h'
-
-  include "constants.h"
-
-  integer, intent(in) :: myrank,iproc_xi,iproc_eta,ichunk, &
-       NPROC_XI,NPROC_ETA,NCHUNKS,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
-       NSPEC2DMAX_XMIN_XMAX_INNER_CORE,NSPEC2DMAX_YMIN_YMAX_INNER_CORE,NSPEC2D_BOTTOM_INNER_CORE
-
-! for addressing of the slices
-  integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1), intent(in) :: addressing
-
-! mesh parameters
-  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE), intent(in) :: ibool_inner_core
-
-! local to global mapping
-  integer, dimension(NSPEC_INNER_CORE), intent(in) :: idoubling_inner_core
-
-  real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE), intent(in) :: xstore_inner_core,ystore_inner_core,zstore_inner_core
-
-! boundary parameters locator
-  integer, intent(in) :: nspec2D_xmin_inner_core,nspec2D_xmax_inner_core,nspec2D_ymin_inner_core,nspec2D_ymax_inner_core
-  integer, dimension(NSPEC2DMAX_XMIN_XMAX_INNER_CORE), intent(in) :: ibelm_xmin_inner_core,ibelm_xmax_inner_core
-  integer, dimension(NSPEC2DMAX_YMIN_YMAX_INNER_CORE), intent(in) :: ibelm_ymin_inner_core,ibelm_ymax_inner_core
-  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE), intent(in) :: ibelm_bottom_inner_core
-
-  integer, intent(in) :: nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices
-
-! for matching with central cube in inner core
-  integer, intent(out) :: receiver_cube_from_slices
-
-  integer, dimension(non_zero_nb_msgs_theor_in_cube), intent(out) :: sender_from_slices_to_cube
-  integer, dimension(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices), intent(out) :: ibool_central_cube
-  double precision, dimension(npoin2D_cube_from_slices,NDIM), intent(out) :: buffer_slices,buffer_slices2
-  double precision, dimension(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM), intent(out) :: &
-        buffer_all_cube_from_slices
-
-! local variables below
-  integer i,j,k,ispec,ispec2D,iglob,ier
-  integer sender,receiver,imsg,ipoin,iproc_xi_loop
-
-  double precision x_target,y_target,z_target
-  double precision x_current,y_current,z_current
-
-! MPI status of messages to be received
-  integer msg_status(MPI_STATUS_SIZE)
-
-  integer :: nproc_xi_half_floor,nproc_xi_half_ceil
-
-  if( mod(NPROC_XI,2) /= 0 ) then
-    nproc_xi_half_floor = floor(NPROC_XI/2.d0)
-    nproc_xi_half_ceil = ceiling(NPROC_XI/2.d0)
-  else
-    nproc_xi_half_floor = NPROC_XI/2
-    nproc_xi_half_ceil = NPROC_XI/2
-  endif
-
-!--- processor to send information to in cube from slices
-
-! four vertical sides first
-  if(ichunk == CHUNK_AC) then
-    if (iproc_xi < nproc_xi_half_floor) then
-      receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,NPROC_XI-1,iproc_eta)
-    else
-      receiver_cube_from_slices = addressing(CHUNK_AB,0,iproc_eta)
-    endif
-  else if(ichunk == CHUNK_BC) then
-    if (iproc_xi < nproc_xi_half_floor) then
-      receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,NPROC_XI-1-iproc_eta,NPROC_ETA-1)
-    else
-      receiver_cube_from_slices = addressing(CHUNK_AB,iproc_eta,NPROC_ETA-1)
-    endif
-  else if(ichunk == CHUNK_AC_ANTIPODE) then
-    if (iproc_xi <= ceiling((NPROC_XI/2.d0)-1)) then
-      receiver_cube_from_slices = addressing(CHUNK_AB,NPROC_XI-1,iproc_eta)
-    else
-      receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,0,iproc_eta)
-    endif
-  else if(ichunk == CHUNK_BC_ANTIPODE) then
-    if (iproc_xi < nproc_xi_half_floor) then
-      receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,iproc_eta,0)
-    else
-      receiver_cube_from_slices = addressing(CHUNK_AB,NPROC_XI-1-iproc_eta,0)
-    endif
-! bottom of cube, direct correspondance but with inverted xi axis
-  else if(ichunk == CHUNK_AB_ANTIPODE) then
-    receiver_cube_from_slices = addressing(CHUNK_AB,NPROC_XI-1-iproc_xi,iproc_eta)
-  else if(ichunk == CHUNK_AB) then
-    receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,NPROC_XI-1-iproc_xi,iproc_eta)
-  endif
-
-
-!--- list of processors to receive information from in cube
-
-! only for slices in central cube
-  if(ichunk == CHUNK_AB) then
-
-! initialize index of sender
-    imsg = 0
-
-! define sender for xi = xi_min edge
-    if(iproc_xi == 0) then
-      do iproc_xi_loop = nproc_xi_half_floor,NPROC_XI-1
-        imsg = imsg + 1
-        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC,iproc_xi_loop,iproc_eta)
-      enddo
-    endif
-
-! define sender for xi = xi_max edge
-    if(iproc_xi == NPROC_XI-1) then
-      do iproc_xi_loop = 0, floor((NPROC_XI-1)/2.d0)
-        imsg = imsg + 1
-        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC_ANTIPODE,iproc_xi_loop,iproc_eta)
-      enddo
-    endif
-
-! define sender for eta = eta_min edge
-    if(iproc_eta == 0) then
-      do iproc_xi_loop = nproc_xi_half_floor,NPROC_XI-1
-        imsg = imsg + 1
-        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC_ANTIPODE,iproc_xi_loop,NPROC_ETA-1-iproc_xi)
-      enddo
-    endif
-
-! define sender for eta = eta_max edge
-    if(iproc_eta == NPROC_ETA-1) then
-      do iproc_xi_loop = nproc_xi_half_floor,NPROC_XI-1
-        imsg = imsg + 1
-        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC,iproc_xi_loop,iproc_xi)
-      enddo
-    endif
-
-! define sender for bottom edge
-! bottom of cube, direct correspondence but with inverted xi axis
-    imsg = imsg + 1
-    sender_from_slices_to_cube(imsg) = addressing(CHUNK_AB_ANTIPODE,NPROC_XI-1-iproc_xi,iproc_eta)
-
-! check that total number of faces found is correct
-    if(imsg /= nb_msgs_theor_in_cube) then
-      print*,'error ',myrank,'nb_msgs_theor_in_cube:',nb_msgs_theor_in_cube,imsg
-      call exit_MPI(myrank,'wrong number of faces found for central cube')
-    endif
-
-  else if(ichunk == CHUNK_AB_ANTIPODE) then
-
-! initialize index of sender
-    imsg = 0
-
-! define sender for xi = xi_min edge
-    if(iproc_xi == 0) then
-      do iproc_xi_loop = nproc_xi_half_ceil,NPROC_XI-1
-        imsg = imsg + 1
-        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC_ANTIPODE,iproc_xi_loop,iproc_eta)
-      enddo
-    endif
-
-! define sender for xi = xi_max edge
-    if(iproc_xi == NPROC_XI-1) then
-      do iproc_xi_loop = 0, floor((NPROC_XI/2.d0)-1.d0)
-        imsg = imsg + 1
-        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC,iproc_xi_loop,iproc_eta)
-      enddo
-    endif
-
-! define sender for eta = eta_min edge
-    if(iproc_eta == 0) then
-      do iproc_xi_loop = 0, floor((NPROC_XI/2.d0)-1.d0)
-        imsg = imsg + 1
-        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC_ANTIPODE,iproc_xi_loop,iproc_xi)
-      enddo
-    endif
-
-! define sender for eta = eta_max edge
-    if(iproc_eta == NPROC_ETA-1) then
-      do iproc_xi_loop = 0, floor((NPROC_XI/2.d0)-1.d0)
-        imsg = imsg + 1
-        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC,iproc_xi_loop,NPROC_ETA-1-iproc_xi)
-      enddo
-    endif
-
-    ! daniel: in case NPROC_XI == 1, the other chunks exchange all bottom points with
-    ! CHUNK_AB **and** CHUNK_AB_ANTIPODE
-    if(NPROC_XI==1) then
-      ! define sender for xi = xi_min edge
-      if(iproc_xi == 0) then
-        imsg = imsg + 1
-        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC_ANTIPODE,0,iproc_eta)
-      endif
-
-      ! define sender for xi = xi_max edge
-      if(iproc_xi == NPROC_XI-1) then
-        imsg = imsg + 1
-        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC,0,iproc_eta)
-      endif
-
-      ! define sender for eta = eta_min edge
-      if(iproc_eta == 0) then
-        imsg = imsg + 1
-        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC_ANTIPODE,0,iproc_xi)
-      endif
-
-      ! define sender for eta = eta_max edge
-      if(iproc_eta == NPROC_ETA-1) then
-        imsg = imsg + 1
-        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC,0,NPROC_ETA-1-iproc_xi)
-      endif
-    endif
-
-! define sender for bottom edge
-! bottom of cube, direct correspondence but with inverted xi axis
-    imsg = imsg + 1
-    sender_from_slices_to_cube(imsg) = addressing(CHUNK_AB,NPROC_XI-1-iproc_xi,iproc_eta)
-
-! check that total number of faces found is correct
-    if(imsg /= nb_msgs_theor_in_cube) then
-      print*,'error ',myrank,'nb_msgs_theor_in_cube:',nb_msgs_theor_in_cube,imsg
-      call exit_MPI(myrank,'wrong number of faces found for central cube')
-    endif
-
-  else
-
-! dummy value in slices
-    sender_from_slices_to_cube(1) = -1
-
-  endif
-
-
-! on chunk AB & AB ANTIPODE, receive all (except bottom) the messages from slices
-  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-    do imsg = 1,nb_msgs_theor_in_cube-1
-
-    ! receive buffers from slices
-    sender = sender_from_slices_to_cube(imsg)
-    call MPI_RECV(buffer_slices, &
-              NDIM*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
-              itag,MPI_COMM_WORLD,msg_status,ier)
-
-    ! copy buffer in 2D array for each slice
-    buffer_all_cube_from_slices(imsg,:,:) = buffer_slices(:,:)
-
-    enddo
-  endif
-
-  ! send info to central cube from all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
-  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE ) then
-
-    ! for bottom elements in contact with central cube from the slices side
-    ipoin = 0
-    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
-
-      ispec = ibelm_bottom_inner_core(ispec2D)
-
-      ! only for DOFs exactly on surface of central cube (bottom of these elements)
-      k = 1
-      do j = 1,NGLLY
-        do i = 1,NGLLX
-          ipoin = ipoin + 1
-          iglob = ibool_inner_core(i,j,k,ispec)
-          buffer_slices(ipoin,1) = dble(xstore_inner_core(iglob))
-          buffer_slices(ipoin,2) = dble(ystore_inner_core(iglob))
-          buffer_slices(ipoin,3) = dble(zstore_inner_core(iglob))
-        enddo
-      enddo
-    enddo
-
-    ! send buffer to central cube
-    receiver = receiver_cube_from_slices
-    call MPI_SEND(buffer_slices,NDIM*npoin2D_cube_from_slices, &
-              MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,ier)
-
-    ! daniel: in case NPROC_XI == 1, the other chunks exchange all bottom points with
-    ! CHUNK_AB **and** CHUNK_AB_ANTIPODE
-    if(NPROC_XI==1) then
-      call MPI_SEND(buffer_slices,NDIM*npoin2D_cube_from_slices, &
-                   MPI_DOUBLE_PRECISION, &
-                   addressing(CHUNK_AB_ANTIPODE,0,iproc_eta), &
-                   itag,MPI_COMM_WORLD,ier)
-    endif
-
-  endif  ! end sending info to central cube
-
-
-  ! exchange of their bottom faces between chunks AB and AB_ANTIPODE
-  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-    ipoin = 0
-    do ispec = NSPEC_INNER_CORE, 1, -1
-      if (idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE) then
-        k = 1
-        do j = 1,NGLLY
-          do i = 1,NGLLX
-            ipoin = ipoin + 1
-            iglob = ibool_inner_core(i,j,k,ispec)
-            buffer_slices(ipoin,1) = dble(xstore_inner_core(iglob))
-            buffer_slices(ipoin,2) = dble(ystore_inner_core(iglob))
-            buffer_slices(ipoin,3) = dble(zstore_inner_core(iglob))
-          enddo
-        enddo
-      endif
-    enddo
-    if (ipoin /= npoin2D_cube_from_slices) then
-      print*,'error',myrank,'bottom points:',npoin2D_cube_from_slices,ipoin
-      call exit_MPI(myrank,'wrong number of points found for bottom CC AB or !AB')
-    endif
-
-    sender = sender_from_slices_to_cube(nb_msgs_theor_in_cube)
-
-    call MPI_SENDRECV(buffer_slices,NDIM*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
-        itag,buffer_slices2,NDIM*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
-        itag,MPI_COMM_WORLD,msg_status,ier)
-
-    buffer_all_cube_from_slices(nb_msgs_theor_in_cube,:,:) = buffer_slices2(:,:)
-
-  endif
-
-  !--- now we need to find the points received and create indirect addressing
-  ibool_central_cube(:,:) = -1
-
-  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-
-   do imsg = 1,nb_msgs_theor_in_cube
-
-    do ipoin = 1,npoin2D_cube_from_slices
-
-      x_target = buffer_all_cube_from_slices(imsg,ipoin,1)
-      y_target = buffer_all_cube_from_slices(imsg,ipoin,2)
-      z_target = buffer_all_cube_from_slices(imsg,ipoin,3)
-
-      ! x = x_min
-      do ispec2D = 1,nspec2D_xmin_inner_core
-        ispec = ibelm_xmin_inner_core(ispec2D)
-        ! do not loop on elements outside of the central cube
-        if(idoubling_inner_core(ispec) /= IFLAG_MIDDLE_CENTRAL_CUBE .and. &
-          idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE .and. &
-          idoubling_inner_core(ispec) /= IFLAG_TOP_CENTRAL_CUBE) cycle
-        !daniel: debug
-        if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE ) stop 'error xmin ibelm'
-        i = 1
-        do k = 1,NGLLZ
-          do j = 1,NGLLY
-           iglob = ibool_inner_core(i,j,k,ispec)
-           x_current = dble(xstore_inner_core(iglob))
-           y_current = dble(ystore_inner_core(iglob))
-           z_current = dble(zstore_inner_core(iglob))
-           ! look for matching point
-           if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
-             ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
-             goto 100
-           endif
-          enddo
-        enddo
-      enddo
-
-      ! x = x_max
-      do ispec2D = 1,nspec2D_xmax_inner_core
-        ispec = ibelm_xmax_inner_core(ispec2D)
-        ! do not loop on elements outside of the central cube
-        if(idoubling_inner_core(ispec) /= IFLAG_MIDDLE_CENTRAL_CUBE .and. &
-            idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE .and. &
-            idoubling_inner_core(ispec) /= IFLAG_TOP_CENTRAL_CUBE) cycle
-        !daniel: debug
-        if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE ) stop 'error xmax ibelm'
-        i = NGLLX
-        do k = 1,NGLLZ
-          do j = 1,NGLLY
-            iglob = ibool_inner_core(i,j,k,ispec)
-            x_current = dble(xstore_inner_core(iglob))
-            y_current = dble(ystore_inner_core(iglob))
-            z_current = dble(zstore_inner_core(iglob))
-            ! look for matching point
-            if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
-              ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
-              goto 100
-            endif
-          enddo
-        enddo
-      enddo
-
-      ! y = y_min
-      do ispec2D = 1,nspec2D_ymin_inner_core
-        ispec = ibelm_ymin_inner_core(ispec2D)
-        ! do not loop on elements outside of the central cube
-        if(idoubling_inner_core(ispec) /= IFLAG_MIDDLE_CENTRAL_CUBE .and. &
-            idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE .and. &
-            idoubling_inner_core(ispec) /= IFLAG_TOP_CENTRAL_CUBE) cycle
-        !daniel: debug
-        if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE ) stop 'error ymin ibelm'
-        j = 1
-        do k = 1,NGLLZ
-          do i = 1,NGLLX
-            iglob = ibool_inner_core(i,j,k,ispec)
-            x_current = dble(xstore_inner_core(iglob))
-            y_current = dble(ystore_inner_core(iglob))
-            z_current = dble(zstore_inner_core(iglob))
-            ! look for matching point
-            if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
-              ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
-              goto 100
-            endif
-          enddo
-        enddo
-      enddo
-
-      ! y = y_max
-      do ispec2D = 1,nspec2D_ymax_inner_core
-        ispec = ibelm_ymax_inner_core(ispec2D)
-        ! do not loop on elements outside of the central cube
-        if(idoubling_inner_core(ispec) /= IFLAG_MIDDLE_CENTRAL_CUBE .and. &
-            idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE .and. &
-            idoubling_inner_core(ispec) /= IFLAG_TOP_CENTRAL_CUBE) cycle
-        !daniel: debug
-        if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE ) stop 'error ymax ibelm'
-        j = NGLLY
-        do k = 1,NGLLZ
-          do i = 1,NGLLX
-            iglob = ibool_inner_core(i,j,k,ispec)
-            x_current = dble(xstore_inner_core(iglob))
-            y_current = dble(ystore_inner_core(iglob))
-            z_current = dble(zstore_inner_core(iglob))
-            ! look for matching point
-            if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
-              ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
-              goto 100
-            endif
-          enddo
-        enddo
-      enddo
-
-      ! bottom of cube
-      do ispec = 1,NSPEC_INNER_CORE
-        ! loop on elements at the bottom of the cube only
-        if(idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE) cycle
-        k = 1
-        do j = 1,NGLLY
-          do i = 1,NGLLX
-            iglob = ibool_inner_core(i,j,k,ispec)
-            x_current = dble(xstore_inner_core(iglob))
-            y_current = dble(ystore_inner_core(iglob))
-            z_current = dble(zstore_inner_core(iglob))
-            ! look for matching point
-            if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
-              ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
-              goto 100
-            endif
-          enddo
-        enddo
-      enddo
-
-      ! point not found so far
-      if(NPROC_XI==1) then
-        ! ignores point
-        ibool_central_cube(imsg,ipoin) = 0
-      else
-        ! check that a matching point is found in all cases
-        call exit_MPI(myrank,'point never found in central cube')
-      endif
-
- 100  continue
-
-    enddo ! ipoin
-
-    ! daniel: check ibool array
-    if(NPROC_XI==1) then
-      if( minval(ibool_central_cube(imsg,:)) < 0 ) call exit_mpi(myrank,'error ibool_central_cube point not found')
-
-      ! removes points on bottom surface in antipode chunk for other chunks than its AB sharing chunk
-      ! (to avoid adding the same point twice from other chunks)
-      if( ichunk == CHUNK_AB_ANTIPODE .and. imsg < nb_msgs_theor_in_cube ) then
-        do ipoin = 1,npoin2D_cube_from_slices
-          x_target = buffer_all_cube_from_slices(imsg,ipoin,1)
-          y_target = buffer_all_cube_from_slices(imsg,ipoin,2)
-          z_target = buffer_all_cube_from_slices(imsg,ipoin,3)
-
-          ! bottom of cube
-          do ispec = 1,NSPEC_INNER_CORE
-            ! loop on elements at the bottom of the cube only
-            if(idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE) cycle
-            k = 1
-            do j = 1,NGLLY
-              do i = 1,NGLLX
-                iglob = ibool_inner_core(i,j,k,ispec)
-                x_current = dble(xstore_inner_core(iglob))
-                y_current = dble(ystore_inner_core(iglob))
-                z_current = dble(zstore_inner_core(iglob))
-                ! look for matching point
-                if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
-                  ibool_central_cube(imsg,ipoin) = 0
-                  goto 200
-                endif
-              enddo
-            enddo
-          enddo
-
- 200      continue
-
-        enddo ! ipoin
-      endif
-
-    endif ! NPROC_XI==1
-
-   enddo ! imsg
-  endif
-
-  end subroutine create_central_cube_buffers
-
-!
-!----------------------------------
-!
-
-  subroutine comp_central_cube_buffer_size(iproc_xi,iproc_eta,ichunk,NPROC_XI,NPROC_ETA,NSPEC2D_BOTTOM_INNER_CORE, &
-                nb_msgs_theor_in_cube,npoin2D_cube_from_slices)
-
-!--- compute number of messages to expect in cube as well as their size
-!--- take into account vertical sides and bottom side
-
-  implicit none
-
-  include "constants.h"
-
-  integer, intent(in) :: iproc_xi,iproc_eta,ichunk,NPROC_XI,NPROC_ETA,NSPEC2D_BOTTOM_INNER_CORE
-
-  integer, intent(out) :: nb_msgs_theor_in_cube,npoin2D_cube_from_slices
-
-!daniel: debug
-  integer :: nproc_xi_half_floor,nproc_xi_half_ceil
-
-  if( mod(NPROC_XI,2) /= 0 ) then
-    nproc_xi_half_floor = floor(NPROC_XI/2.d0)
-    nproc_xi_half_ceil = ceiling(NPROC_XI/2.d0)
-  else
-    nproc_xi_half_floor = NPROC_XI/2
-    nproc_xi_half_ceil = NPROC_XI/2
-  endif
-
-! only for slices in central cube
-  if(ichunk == CHUNK_AB) then
-    if(NPROC_XI == 1) then
-      ! five sides if only one processor in cube
-      nb_msgs_theor_in_cube = 5
-    else
-      ! case of a corner
-      if((iproc_xi == 0 .or. iproc_xi == NPROC_XI-1).and. &
-         (iproc_eta == 0 .or. iproc_eta == NPROC_ETA-1)) then
-        ! slices on both "vertical" faces plus one slice at the bottom
-        nb_msgs_theor_in_cube = 2*(nproc_xi_half_ceil) + 1
-      ! case of an edge
-      else if(iproc_xi == 0 .or. iproc_xi == NPROC_XI-1 .or. &
-              iproc_eta == 0 .or. iproc_eta == NPROC_ETA-1) then
-        ! slices on the "vertical" face plus one slice at the bottom
-        nb_msgs_theor_in_cube = nproc_xi_half_ceil + 1
-      else
-        ! bottom element only
-        nb_msgs_theor_in_cube = 1
-      endif
-    endif
-  else if(ichunk == CHUNK_AB_ANTIPODE) then
-    if(NPROC_XI == 1) then
-      ! five sides if only one processor in cube
-      nb_msgs_theor_in_cube = 5
-    else
-      ! case of a corner
-      if((iproc_xi == 0 .or. iproc_xi == NPROC_XI-1).and. &
-         (iproc_eta == 0 .or. iproc_eta == NPROC_ETA-1)) then
-        ! slices on both "vertical" faces plus one slice at the bottom
-        nb_msgs_theor_in_cube = 2*(nproc_xi_half_floor) + 1
-      ! case of an edge
-      else if(iproc_xi == 0 .or. iproc_xi == NPROC_XI-1 .or. &
-              iproc_eta == 0 .or. iproc_eta == NPROC_ETA-1) then
-        ! slices on the "vertical" face plus one slice at the bottom
-        nb_msgs_theor_in_cube = nproc_xi_half_floor + 1
-      else
-        ! bottom element only
-        nb_msgs_theor_in_cube = 1
-      endif
-    endif
-  else
-    ! not in chunk AB
-    nb_msgs_theor_in_cube = 0
-  endif
-
-  ! number of points to send or receive (bottom of slices)
-  npoin2D_cube_from_slices = NSPEC2D_BOTTOM_INNER_CORE * NGLLX * NGLLY
-
-  end subroutine comp_central_cube_buffer_size
-

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/finalize_simulation.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -139,21 +139,41 @@
   endif
 
   ! frees dynamically allocated memory
+
   ! mpi buffers
-  deallocate(buffer_send_faces, &
-            buffer_received_faces)
-  deallocate(b_buffer_send_faces, &
-            b_buffer_received_faces)
+  deallocate(buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle, &
+            request_send_vector_crust_mantle,request_recv_vector_crust_mantle)
+  deallocate(buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core, &
+            request_send_scalar_outer_core,request_recv_scalar_outer_core)
+  deallocate(buffer_send_vector_inner_core,buffer_recv_vector_inner_core, &
+            request_send_vector_inner_core,request_recv_vector_inner_core)
+                    
+  if( SIMULATION_TYPE == 3 ) then
+    deallocate(b_buffer_send_vector_crust_mantle,b_buffer_recv_vector_crust_mantle, &
+              b_request_send_vector_crust_mantle,b_request_recv_vector_crust_mantle)
+    deallocate(b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core, &
+              b_request_send_scalar_outer_core,b_request_recv_scalar_outer_core)
+    deallocate(b_buffer_send_vector_inner_core,b_buffer_recv_vector_inner_core, &
+              b_request_send_vector_inner_core,b_request_recv_vector_inner_core)
+  endif
 
-  ! central cube buffers
-  deallocate(sender_from_slices_to_cube, &
-            buffer_all_cube_from_slices, &
-            b_buffer_all_cube_from_slices, &
-            buffer_slices, &
-            b_buffer_slices, &
-            buffer_slices2, &
-            ibool_central_cube)
+  deallocate(my_neighbours_crust_mantle,nibool_interfaces_crust_mantle)
+  deallocate(ibool_interfaces_crust_mantle)
+  deallocate(my_neighbours_outer_core,nibool_interfaces_outer_core)
+  deallocate(ibool_interfaces_outer_core)
+  deallocate(my_neighbours_inner_core,nibool_interfaces_inner_core)
+  deallocate(ibool_interfaces_inner_core)
 
+  ! inner/outer elements
+  deallocate(phase_ispec_inner_crust_mantle)
+  deallocate(phase_ispec_inner_outer_core)
+  deallocate(phase_ispec_inner_inner_core)
+
+  ! coloring
+  deallocate(num_elem_colors_crust_mantle)
+  deallocate(num_elem_colors_outer_core)
+  deallocate(num_elem_colors_inner_core)
+  
   ! sources
   deallocate(islice_selected_source, &
             ispec_selected_source, &

Deleted: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/fix_non_blocking_flags.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/fix_non_blocking_flags.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/fix_non_blocking_flags.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -1,187 +0,0 @@
-!=====================================================================
-!
-!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
-!          --------------------------------------------------
-!
-!          Main authors: Dimitri Komatitsch and Jeroen Tromp
-!                        Princeton University, USA
-!             and University of Pau / CNRS / INRIA, France
-! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-!                            April 2011
-!
-! This program is free software; you can redistribute it and/or modify
-! it under the terms of the GNU General Public License as published by
-! the Free Software Foundation; either version 2 of the License, or
-! (at your option) any later version.
-!
-! This program is distributed in the hope that it will be useful,
-! but WITHOUT ANY WARRANTY; without even the implied warranty of
-! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-! GNU General Public License for more details.
-!
-! You should have received a copy of the GNU General Public License along
-! with this program; if not, write to the Free Software Foundation, Inc.,
-! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-!
-!=====================================================================
-
-! fix the non blocking arrays to assemble the slices inside each chunk: elements
-! in contact with the MPI faces by an edge or a corner only but not
-! a full face are missing, therefore let us add them
-  subroutine fix_non_blocking_slices(is_on_a_slice_edge,iboolright_xi, &
-         iboolleft_xi,iboolright_eta,iboolleft_eta, &
-         npoin2D_xi,npoin2D_eta,ibool, &
-         mask_ibool,nspec,nglob,NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX)
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: nspec,nglob,NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX
-
-  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi,npoin2D_eta
-
-  logical, dimension(nspec) :: is_on_a_slice_edge
-
-  integer, dimension(NGLOB2DMAX_XMIN_XMAX) :: iboolleft_xi,iboolright_xi
-  integer, dimension(NGLOB2DMAX_YMIN_YMAX) :: iboolleft_eta,iboolright_eta
-
-  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-
-  logical, dimension(nglob) :: mask_ibool
-
-  integer :: ipoin,ispec,i,j,k
-
-! clean the mask
-  mask_ibool(:) = .false.
-
-! mark all the points that are in the MPI buffers to assemble inside each chunk
-  do ipoin = 1,npoin2D_xi(1)
-    mask_ibool(iboolleft_xi(ipoin)) = .true.
-  enddo
-
-  do ipoin = 1,npoin2D_eta(1)
-    mask_ibool(iboolleft_eta(ipoin)) = .true.
-  enddo
-
-  do ipoin = 1,npoin2D_xi(2)
-    mask_ibool(iboolright_xi(ipoin)) = .true.
-  enddo
-
-  do ipoin = 1,npoin2D_eta(2)
-    mask_ibool(iboolright_eta(ipoin)) = .true.
-  enddo
-
-! now label all the elements that have at least one corner belonging
-! to any of these buffers as elements that must contribute to the
-! first step of the calculations (performed on the edges before starting
-! the non blocking communications); there is no need to examine the inside
-! of the elements, checking their eight corners is sufficient
-  do ispec = 1,nspec
-    do k = 1,NGLLZ,NGLLZ-1
-      do j  = 1,NGLLY,NGLLY-1
-        do i = 1,NGLLX,NGLLX-1
-          if(mask_ibool(ibool(i,j,k,ispec))) then
-            is_on_a_slice_edge(ispec) = .true.
-            goto 888
-          endif
-        enddo
-      enddo
-    enddo
-  888 continue
-  enddo
-
-  end subroutine fix_non_blocking_slices
-
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-
-! fix the non blocking arrays to assemble the central cube: elements
-! in contact with the MPI faces by an edge or a corner only but not
-! a full face are missing, therefore let us add them
-  subroutine fix_non_blocking_central_cube(is_on_a_slice_edge, &
-         ibool,nspec,nglob,nb_msgs_theor_in_cube,ibelm_bottom_inner_core, &
-         idoubling_inner_core,npoin2D_cube_from_slices, &
-         ibool_central_cube,NSPEC2D_BOTTOM_INNER_CORE, &
-         ichunk,NPROC_XI)
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: nspec,nglob,nb_msgs_theor_in_cube,NSPEC2D_BOTTOM_INNER_CORE
-  integer :: ichunk,npoin2D_cube_from_slices,NPROC_XI
-
-  logical, dimension(nspec) :: is_on_a_slice_edge
-
-  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-
-  integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices) :: ibool_central_cube
-
-  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
-
-! local to global mapping
-  integer, dimension(nspec) :: idoubling_inner_core
-
-! this mask is declared as integer in the calling program because it is used elsewhere
-! to store integers, and it is reused here as a logical to save memory
-  logical, dimension(nglob) :: mask_ibool
-
-  integer :: ipoin,ispec,i,j,k,imsg,ispec2D
-
-  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
-    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
-      ispec = ibelm_bottom_inner_core(ispec2D)
-      is_on_a_slice_edge(ispec) = .true.
-    enddo
-  endif
-
-  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-    do ispec = 1,nspec
-      if(idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE .or. &
-         idoubling_inner_core(ispec) == IFLAG_TOP_CENTRAL_CUBE) &
-        is_on_a_slice_edge(ispec) = .true.
-    enddo
-  endif
-
-  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
-
-    ! clean the mask
-    mask_ibool(:) = .false.
-
-    do imsg = 1,nb_msgs_theor_in_cube
-      do ipoin = 1,npoin2D_cube_from_slices
-        if(NPROC_XI==1) then
-          if(ibool_central_cube(imsg,ipoin) > 0 ) then
-            mask_ibool(ibool_central_cube(imsg,ipoin)) = .true.
-          endif
-        else
-          mask_ibool(ibool_central_cube(imsg,ipoin)) = .true.
-        endif
-      enddo
-    enddo
-
-    ! now label all the elements that have at least one corner belonging
-    ! to any of these buffers as elements that must contribute to the
-    ! first step of the calculations (performed on the edges before starting
-    ! the non blocking communications); there is no need to examine the inside
-    ! of the elements, checking their eight corners is sufficient
-    do ispec = 1,nspec
-      do k = 1,NGLLZ,NGLLZ-1
-        do j  = 1,NGLLY,NGLLY-1
-          do i = 1,NGLLX,NGLLX-1
-            if(mask_ibool(ibool(i,j,k,ispec))) then
-              is_on_a_slice_edge(ispec) = .true.
-              goto 888
-            endif
-          enddo
-        enddo
-      enddo
-    888 continue
-    enddo
-
-  endif
-
-  end subroutine fix_non_blocking_central_cube
-

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/initialize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/initialize_simulation.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/initialize_simulation.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -102,8 +102,7 @@
       call exit_MPI(myrank,'an error occurred while reading the parameter file')
     endif
 
-    ! GPU_MODE is in par_file
-    ! parameter is optional, may not be in the Par_file
+    ! GPU_MODE: parameter is optional, may not be in the Par_file
     call read_gpu_mode(GPU_MODE)
 
   endif

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/iterate_time.F90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -110,10 +110,7 @@
     endif
 
     ! outputs movie files
-!daniel: debug
-!    if( MOVIE_SURFACE .or. MOVIE_VOLUME ) then
-      call write_movie_output()
-!    endif
+    call write_movie_output()
 
     ! first step of noise tomography, i.e., save a surface movie at every time step
     ! modified from the subroutine 'write_movie_surface'

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_receivers.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_receivers.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -34,7 +34,7 @@
                              yr,jda,ho,mi,sec, &
                              theta_source,phi_source,NCHUNKS,ELLIPTICITY)
 
-  use constants
+  use constants_solver
   use specfem_par,only: &
     myrank,DT,NSTEP, &
     xigll,yigll,zigll, &
@@ -106,14 +106,15 @@
   double precision etax,etay,etaz
   double precision gammax,gammay,gammaz
 
-! timer MPI
+  ! timer MPI
   double precision time_start,tCPU
 
-! use dynamic allocation
+  ! use dynamic allocation
   double precision, dimension(:), allocatable :: final_distance
   double precision, dimension(:,:), allocatable :: final_distance_all
+
   double precision distmin,final_distance_max
-
+  
 ! receiver information
 ! timing information for the stations
 ! station information for writing the seismograms
@@ -130,12 +131,19 @@
   integer, allocatable, dimension(:,:) :: ispec_selected_rec_all
   double precision, allocatable, dimension(:,:) :: xi_receiver_all,eta_receiver_all,gamma_receiver_all
 
+  double precision x_target_rec,y_target_rec,z_target_rec
+
   double precision typical_size
   logical located_target
 
   character(len=150) OUTPUT_FILES
   character(len=2) bic
 
+  integer,parameter :: MIDX = (NGLLX+1)/2
+  integer,parameter :: MIDY = (NGLLY+1)/2
+  integer,parameter :: MIDZ = (NGLLZ+1)/2
+
+
   ! get MPI starting time
   time_start = MPI_WTIME()
 
@@ -311,10 +319,14 @@
     r0 = r0 - stbur(irec)/R_EARTH
 
     ! compute the Cartesian position of the receiver
-    x_target(irec) = r0*sin(theta)*cos(phi)
-    y_target(irec) = r0*sin(theta)*sin(phi)
-    z_target(irec) = r0*cos(theta)
+    x_target_rec = r0*sin(theta)*cos(phi)
+    y_target_rec = r0*sin(theta)*sin(phi)
+    z_target_rec = r0*cos(theta)
 
+    x_target(irec) = x_target_rec
+    y_target(irec) = y_target_rec
+    z_target(irec) = z_target_rec
+
     ! would write out desired target locations of receivers
     !if (myrank == 0) write(IOVTK,*) sngl(x_target(irec)), sngl(y_target(irec)), sngl(z_target(irec))
 
@@ -324,10 +336,10 @@
     do ispec=1,nspec
 
       ! exclude elements that are too far from target
-      iglob = ibool(1,1,1,ispec)
-      dist = dsqrt((x_target(irec) - dble(xstore(iglob)))**2 &
-                 + (y_target(irec) - dble(ystore(iglob)))**2 &
-                 + (z_target(irec) - dble(zstore(iglob)))**2)
+      iglob = ibool(MIDX,MIDY,MIDZ,ispec)
+      dist = dsqrt((x_target_rec - dble(xstore(iglob)))**2 &
+                 + (y_target_rec - dble(ystore(iglob)))**2 &
+                 + (z_target_rec - dble(zstore(iglob)))**2)
       if(USE_DISTANCE_CRITERION .and. dist > typical_size) cycle
 
       ! loop only on points inside the element
@@ -337,9 +349,9 @@
           do i=2,NGLLX-1
 
             iglob = ibool(i,j,k,ispec)
-            dist = dsqrt((x_target(irec)-dble(xstore(iglob)))**2 &
-                        +(y_target(irec)-dble(ystore(iglob)))**2 &
-                        +(z_target(irec)-dble(zstore(iglob)))**2)
+            dist = dsqrt((x_target_rec - dble(xstore(iglob)))**2 &
+                        +(y_target_rec - dble(ystore(iglob)))**2 &
+                        +(z_target_rec - dble(zstore(iglob)))**2)
 
             !  keep this point if it is closer to the receiver
             if(dist < distmin) then
@@ -362,9 +374,9 @@
     ! therefore use first element only for fictitious iterative search
     if(.not. located_target) then
       ispec_selected_rec(irec)=1
-      ix_initial_guess(irec) = 2
-      iy_initial_guess(irec) = 2
-      iz_initial_guess(irec) = 2
+      ix_initial_guess(irec) = MIDX
+      iy_initial_guess(irec) = MIDY
+      iz_initial_guess(irec) = MIDZ
     endif
 
   ! end of loop on all the stations
@@ -424,24 +436,18 @@
 ! find the best (xi,eta) for each receiver
 ! ****************************************
 
-! loop on all the receivers to iterate in that slice
+  ! loop on all the receivers to iterate in that slice
   do irec = 1,nrec
 
     ispec_iterate = ispec_selected_rec(irec)
 
-    ! use initial guess in xi and eta
-    xi = xigll(ix_initial_guess(irec))
-    eta = yigll(iy_initial_guess(irec))
-    gamma = zigll(iz_initial_guess(irec))
-
     ! define coordinates of the control points of the element
-
     do ia=1,NGNOD
 
       if(iaddx(ia) == 0) then
         iax = 1
       else if(iaddx(ia) == 1) then
-        iax = (NGLLX+1)/2
+        iax = MIDX
       else if(iaddx(ia) == 2) then
         iax = NGLLX
       else
@@ -451,7 +457,7 @@
       if(iaddy(ia) == 0) then
         iay = 1
       else if(iaddy(ia) == 1) then
-        iay = (NGLLY+1)/2
+        iay = MIDY
       else if(iaddy(ia) == 2) then
         iay = NGLLY
       else
@@ -461,7 +467,7 @@
       if(iaddr(ia) == 0) then
         iaz = 1
       else if(iaddr(ia) == 1) then
-        iaz = (NGLLZ+1)/2
+        iaz = MIDZ
       else if(iaddr(ia) == 2) then
         iaz = NGLLZ
       else
@@ -475,6 +481,15 @@
 
     enddo
 
+    ! use initial guess in xi and eta
+    xi = xigll(ix_initial_guess(irec))
+    eta = yigll(iy_initial_guess(irec))
+    gamma = zigll(iz_initial_guess(irec))
+
+    x_target_rec = x_target(irec)
+    y_target_rec = y_target(irec)
+    z_target_rec = z_target(irec)
+
     ! iterate to solve the non linear system
     do iter_loop = 1,NUM_ITER
 
@@ -483,12 +498,12 @@
 
       ! recompute jacobian for the new point
       call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
-           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+                             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
 
       ! compute distance to target location
-      dx = - (x - x_target(irec))
-      dy = - (y - y_target(irec))
-      dz = - (z - z_target(irec))
+      dx = - (x - x_target_rec)
+      dy = - (y - y_target_rec)
+      dz = - (z - z_target_rec)
 
       ! compute increments
       ! gamma does not change since we know the receiver is exactly on the surface
@@ -521,7 +536,7 @@
 
     ! compute final coordinates of point found
     call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
-         xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+                           xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
 
     ! store xi,eta and x,y,z of point found
     xi_receiver(irec) = xi
@@ -531,13 +546,14 @@
     y_found(irec) = y
     z_found(irec) = z
 
-! compute final distance between asked and found (converted to km)
-    final_distance(irec) = dsqrt((x_target(irec)-x_found(irec))**2 + &
-        (y_target(irec)-y_found(irec))**2 + (z_target(irec)-z_found(irec))**2)*R_EARTH/1000.d0
+    ! compute final distance between asked and found (converted to km)
+    final_distance(irec) = dsqrt((x_target_rec-x)**2 + &
+                                 (y_target_rec-y)**2 + &
+                                 (z_target_rec-z)**2)*R_EARTH/1000.d0
 
   enddo
 
-! for MPI version, gather information from all the nodes
+  ! for MPI version, gather information from all the nodes
   ispec_selected_rec_all(:,:) = -1
   call MPI_GATHER(ispec_selected_rec,nrec,MPI_INTEGER,ispec_selected_rec_all,nrec,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
 
@@ -556,7 +572,7 @@
   call MPI_GATHER(z_found,nrec,MPI_DOUBLE_PRECISION,z_found_all,nrec, &
                   MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
 
-! this is executed by main process only
+  ! this is executed by main process only
   if(myrank == 0) then
 
     ! check that the gather operation went well
@@ -690,17 +706,20 @@
       close(27)
     endif
 
-    ! elapsed time since beginning of mesh generation
-    tCPU = MPI_WTIME() - time_start
-    write(IMAIN,*)
-    write(IMAIN,*) 'Elapsed time for receiver detection in seconds = ',tCPU
-    write(IMAIN,*)
-    write(IMAIN,*) 'End of receiver detection - done'
-    write(IMAIN,*)
-
   endif    ! end of section executed by main process only
 
-! main process broadcasts the results to all the slices
+  ! deallocate arrays
+  deallocate(epidist)
+  deallocate(ix_initial_guess,iy_initial_guess,iz_initial_guess)
+  deallocate(x_target,y_target,z_target)
+  deallocate(x_found,y_found,z_found)
+  deallocate(final_distance)
+  deallocate(ispec_selected_rec_all)
+  deallocate(xi_receiver_all,eta_receiver_all,gamma_receiver_all)
+  deallocate(x_found_all,y_found_all,z_found_all)
+  deallocate(final_distance_all)
+
+  ! main process broadcasts the results to all the slices
   call MPI_BCAST(nrec,1,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
 
   call MPI_BCAST(islice_selected_rec,nrec,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
@@ -718,26 +737,16 @@
   call MPI_BCAST(stbur,nrec,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
   call MPI_BCAST(nu,nrec*3*3,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
 
-  ! deallocate arrays
-  deallocate(epidist)
-  deallocate(ix_initial_guess)
-  deallocate(iy_initial_guess)
-  deallocate(iz_initial_guess)
-  deallocate(x_target)
-  deallocate(y_target)
-  deallocate(z_target)
-  deallocate(x_found)
-  deallocate(y_found)
-  deallocate(z_found)
-  deallocate(final_distance)
-  deallocate(ispec_selected_rec_all)
-  deallocate(xi_receiver_all)
-  deallocate(eta_receiver_all)
-  deallocate(gamma_receiver_all)
-  deallocate(x_found_all)
-  deallocate(y_found_all)
-  deallocate(z_found_all)
-  deallocate(final_distance_all)
+  ! elapsed time since beginning of mesh generation
+  if( myrank == 0 ) then
+    tCPU = MPI_WTIME() - time_start
+    write(IMAIN,*)
+    write(IMAIN,*) 'Elapsed time for receiver detection in seconds = ',tCPU
+    write(IMAIN,*)
+    write(IMAIN,*) 'End of receiver detection - done'
+    write(IMAIN,*)
+  endif
+  call sync_all()
 
   end subroutine locate_receivers
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_sources.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_sources.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_sources.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -33,7 +33,7 @@
                             xstore,ystore,zstore, &
                             ELLIPTICITY,min_tshift_cmt_original)
 
-  use constants
+  use constants_solver
   use specfem_par,only: &
     NSOURCES,myrank, &
     tshift_cmt,theta_source,phi_source, &
@@ -149,6 +149,10 @@
   integer :: yr,jda,ho,mi
   double precision :: sec
 
+  integer,parameter :: MIDX = (NGLLX+1)/2
+  integer,parameter :: MIDY = (NGLLY+1)/2
+  integer,parameter :: MIDZ = (NGLLZ+1)/2
+
   ! get MPI starting time for all sources
   time_start = MPI_WTIME()
 
@@ -183,7 +187,8 @@
 
   ! initializes source mask
   if( SAVE_SOURCE_MASK .and. SIMULATION_TYPE == 3 ) then
-    allocate( mask_source(NGLLX,NGLLY,NGLLZ,NSPEC) )
+    allocate( mask_source(NGLLX,NGLLY,NGLLZ,NSPEC),stat=ier )
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating mask source array')
     mask_source(:,:,:,:) = 1.0_CUSTOM_REAL
   endif
 
@@ -337,14 +342,12 @@
       do ispec = 1,nspec
 
         ! exclude elements that are too far from target
-        iglob = ibool(1,1,1,ispec)
+        iglob = ibool(MIDX,MIDY,MIDZ,ispec)
         dist = dsqrt((x_target_source - dble(xstore(iglob)))**2 &
                    + (y_target_source - dble(ystore(iglob)))**2 &
                    + (z_target_source - dble(zstore(iglob)))**2)
         if(USE_DISTANCE_CRITERION .and. dist > typical_size) cycle
 
-        located_target = .true.
-
         ! define the interval in which we look for points
         if(USE_FORCE_POINT_SOURCE) then
           ! force sources will be put on an exact GLL point
@@ -370,6 +373,7 @@
           kmin = 2
           kmax = NGLLZ - 1
         endif
+        
         do k = kmin,kmax
           do j = jmin,jmax
             do i = imin,imax
@@ -385,6 +389,8 @@
                 ix_initial_guess_source = i
                 iy_initial_guess_source = j
                 iz_initial_guess_source = k
+                located_target = .true.
+                !print*,myrank,'dist:',distmin*R_EARTH/1000.d0,i,j,k,ispec
               endif
 
             enddo
@@ -409,9 +415,9 @@
       ! therefore use first element only for fictitious iterative search
       if(.not. located_target) then
         ispec_selected_source_subset(isource_in_this_subset)=1
-        ix_initial_guess_source = 2
-        iy_initial_guess_source = 2
-        iz_initial_guess_source = 2
+        ix_initial_guess_source = MIDX
+        iy_initial_guess_source = MIDY
+        iz_initial_guess_source = MIDZ
       endif
 
       ! for point sources, the location will be exactly at a GLL point
@@ -438,18 +444,13 @@
 
       else
 
-        ! use initial guess in xi, eta and gamma
-        xi = xigll(ix_initial_guess_source)
-        eta = yigll(iy_initial_guess_source)
-        gamma = zigll(iz_initial_guess_source)
-
         ! define coordinates of the control points of the element
         do ia=1,NGNOD
 
           if(iaddx(ia) == 0) then
             iax = 1
           else if(iaddx(ia) == 1) then
-            iax = (NGLLX+1)/2
+            iax = MIDX
           else if(iaddx(ia) == 2) then
             iax = NGLLX
           else
@@ -459,7 +460,7 @@
           if(iaddy(ia) == 0) then
             iay = 1
           else if(iaddy(ia) == 1) then
-            iay = (NGLLY+1)/2
+            iay = MIDY
           else if(iaddy(ia) == 2) then
             iay = NGLLY
           else
@@ -469,7 +470,7 @@
           if(iaddr(ia) == 0) then
             iaz = 1
           else if(iaddr(ia) == 1) then
-            iaz = (NGLLZ+1)/2
+            iaz = MIDZ
           else if(iaddr(ia) == 2) then
             iaz = NGLLZ
           else
@@ -483,6 +484,11 @@
 
         enddo
 
+        ! use initial guess in xi, eta and gamma
+        xi = xigll(ix_initial_guess_source)
+        eta = yigll(iy_initial_guess_source)
+        gamma = zigll(iz_initial_guess_source)
+
         ! iterate to solve the non linear system
         do iter_loop = 1,NUM_ITER
 
@@ -494,12 +500,17 @@
           dx = - (x - x_target_source)
           dy = - (y - y_target_source)
           dz = - (z - z_target_source)
-
+          
           ! compute increments
           dxi  = xix*dx + xiy*dy + xiz*dz
           deta = etax*dx + etay*dy + etaz*dz
-          dgamma = gammax*dx + gammay*dy + gammaz*dz
+          dgamma =  gammax*dx + gammay*dy + gammaz*dz
 
+          ! impose limit on increments
+          if( abs(dxi) > 0.3d0 ) dxi = sign(1.0d0,dxi)*0.3d0
+          if( abs(deta) > 0.3d0 ) deta = sign(1.0d0,deta)*0.3d0
+          if( abs(dgamma) > 0.3d0 ) dgamma = sign(1.0d0,dgamma)*0.3d0
+
           ! update values
           xi = xi + dxi
           eta = eta + deta
@@ -520,8 +531,8 @@
         enddo
 
         ! compute final coordinates of point found
-        call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma, &
-                               x,y,z,xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+        call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
+                               xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
 
         ! store xi,eta,gamma and x,y,z of point found
         xi_source_subset(isource_in_this_subset) = xi
@@ -533,9 +544,9 @@
 
         ! compute final distance between asked and found (converted to km)
         final_distance_source_subset(isource_in_this_subset) = &
-          dsqrt((x_target_source-x_found_source(isource_in_this_subset))**2 + &
-            (y_target_source-y_found_source(isource_in_this_subset))**2 + &
-            (z_target_source-z_found_source(isource_in_this_subset))**2)*R_EARTH/1000.d0
+          dsqrt((x_target_source-x)**2 + &
+                (y_target_source-y)**2 + &
+                (z_target_source-z)**2)*R_EARTH/1000.d0
 
       endif ! USE_FORCE_POINT_SOURCE
 
@@ -800,6 +811,12 @@
   call MPI_BCAST(eta_source,NSOURCES,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
   call MPI_BCAST(gamma_source,NSOURCES,MPI_DOUBLE_PRECISION,0,MPI_COMM_WORLD,ier)
 
+  ! stores source mask
+  if( SAVE_SOURCE_MASK .and. SIMULATION_TYPE == 3 ) then
+    call save_mask_source(myrank,mask_source,NSPEC,LOCAL_TMP_PATH)
+    deallocate( mask_source )
+  endif
+
   ! elapsed time since beginning of source detection
   if(myrank == 0) then
     tCPU = MPI_WTIME() - time_start
@@ -809,13 +826,8 @@
     write(IMAIN,*) 'End of source detection - done'
     write(IMAIN,*)
   endif
-
-  ! stores source mask
-  if( SAVE_SOURCE_MASK .and. SIMULATION_TYPE == 3 ) then
-    call save_mask_source(myrank,mask_source,NSPEC,LOCAL_TMP_PATH)
-    deallocate( mask_source )
-  endif
-
+  call sync_all()
+  
   end subroutine locate_sources
 
 !
@@ -892,12 +904,16 @@
   character(len=150) :: LOCAL_TMP_PATH
 
   ! local parameters
+  integer :: ier
   character(len=150) :: prname
 
   ! stores into file
   call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_TMP_PATH)
 
-  open(unit=27,file=trim(prname)//'mask_source.bin',status='unknown',form='unformatted',action='write')
+  open(unit=27,file=trim(prname)//'mask_source.bin', &
+        status='unknown',form='unformatted',action='write',iostat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error opening mask_source.bin file')
+  
   write(27) mask_source
   close(27)
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/prepare_timerun.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -190,11 +190,15 @@
 
   ! check that all the mass matrices are positive
   if(OCEANS_VAL) then
-    if(minval(rmass_ocean_load) <= 0.) call exit_MPI(myrank,'negative mass matrix term for the oceans')
+    if(minval(rmass_ocean_load) <= 0._CUSTOM_REAL) &
+      call exit_MPI(myrank,'negative mass matrix term for the oceans')
   endif
-  if(minval(rmass_crust_mantle) <= 0.) call exit_MPI(myrank,'negative mass matrix term for the crust_mantle')
-  if(minval(rmass_inner_core) <= 0.) call exit_MPI(myrank,'negative mass matrix term for the inner core')
-  if(minval(rmass_outer_core) <= 0.) call exit_MPI(myrank,'negative mass matrix term for the outer core')
+  if(minval(rmass_crust_mantle) <= 0._CUSTOM_REAL) &
+    call exit_MPI(myrank,'negative mass matrix term for the crust_mantle')
+  if(minval(rmass_inner_core) <= 0._CUSTOM_REAL) &
+    call exit_MPI(myrank,'negative mass matrix term for the inner core')
+  if(minval(rmass_outer_core) <= 0._CUSTOM_REAL) &
+    call exit_MPI(myrank,'negative mass matrix term for the outer core')
 
   ! for efficiency, invert final mass matrix once and for all on each slice
   if(OCEANS_VAL) rmass_ocean_load = 1._CUSTOM_REAL / rmass_ocean_load
@@ -217,97 +221,43 @@
   use specfem_par_outercore
   implicit none
 
-  ! local parameters
-  integer :: ndim_assemble
-
-  ! temporary buffers for send and receive between faces of the slices and the chunks
-  real(kind=CUSTOM_REAL), dimension(npoin2D_max_all_CM_IC) ::  &
-    buffer_send_faces_scalar,buffer_received_faces_scalar
-
-  ! synchronize all the processes before assembling the mass matrix
-  ! to make sure all the nodes have finished to read their databases
-  call sync_all()
-
   ! the mass matrix needs to be assembled with MPI here once and for all
 
   ! ocean load
   if (OCEANS_VAL) then
-    call assemble_MPI_scalar_block(myrank,rmass_ocean_load,NGLOB_CRUST_MANTLE, &
-            iproc_xi,iproc_eta,ichunk,addressing, &
-            iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
-            npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
-            iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
-            iprocfrom_faces,iprocto_faces,imsg_type, &
-            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
-            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
-            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
-            NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
-            NGLOB2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_XY,NCHUNKS_VAL)
+    call assemble_MPI_scalar_ext_mesh(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
+                        rmass_ocean_load, &
+                        num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+                        nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle,&
+                        my_neighbours_crust_mantle)
   endif
 
   ! crust and mantle
-  call assemble_MPI_scalar_block(myrank,rmass_crust_mantle,NGLOB_CRUST_MANTLE, &
-            iproc_xi,iproc_eta,ichunk,addressing, &
-            iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
-            npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
-            iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
-            iprocfrom_faces,iprocto_faces,imsg_type, &
-            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
-            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
-            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
-            NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
-            NGLOB2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_XY,NCHUNKS_VAL)
+  call assemble_MPI_scalar_ext_mesh(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
+                        rmass_crust_mantle, &
+                        num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
+                        nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle,&
+                        my_neighbours_crust_mantle)
 
   ! outer core
-  call assemble_MPI_scalar_block(myrank,rmass_outer_core,NGLOB_OUTER_CORE, &
-            iproc_xi,iproc_eta,ichunk,addressing, &
-            iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
-            npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
-            iboolfaces_outer_core,iboolcorner_outer_core, &
-            iprocfrom_faces,iprocto_faces,imsg_type, &
-            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
-            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
-            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
-            NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
-            NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE),NGLOB2DMAX_XY,NCHUNKS_VAL)
+  call assemble_MPI_scalar_ext_mesh(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
+                        rmass_outer_core, &
+                        num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
+                        nibool_interfaces_outer_core,ibool_interfaces_outer_core,&
+                        my_neighbours_outer_core)
 
   ! inner core
-  call assemble_MPI_scalar_block(myrank,rmass_inner_core,NGLOB_INNER_CORE, &
-            iproc_xi,iproc_eta,ichunk,addressing, &
-            iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
-            npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
-            iboolfaces_inner_core,iboolcorner_inner_core, &
-            iprocfrom_faces,iprocto_faces,imsg_type, &
-            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
-            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
-            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
-            NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_INNER_CORE), &
-            NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE),NGLOB2DMAX_XY,NCHUNKS_VAL)
+  call assemble_MPI_scalar_ext_mesh(NPROCTOT_VAL,NGLOB_INNER_CORE, &
+                        rmass_inner_core, &
+                        num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
+                        nibool_interfaces_inner_core,ibool_interfaces_inner_core,&
+                        my_neighbours_inner_core)
 
-
   ! mass matrix including central cube
   if(INCLUDE_CENTRAL_CUBE) then
-    ! the mass matrix to assemble is a scalar, not a vector
-    ndim_assemble = 1
-
-    ! use central cube buffers to assemble the inner core mass matrix with the central cube
-    call assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
-                 npoin2D_cube_from_slices, buffer_all_cube_from_slices, &
-                 buffer_slices, buffer_slices2, ibool_central_cube, &
-                 receiver_cube_from_slices, ibool_inner_core, &
-                 idoubling_inner_core, NSPEC_INNER_CORE, &
-                 ibelm_bottom_inner_core, NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
-                 NGLOB_INNER_CORE, &
-                 rmass_inner_core,ndim_assemble, &
-                 iproc_eta,addressing,NCHUNKS_VAL,NPROC_XI_VAL,NPROC_ETA_VAL)
-
     ! suppress fictitious mass matrix elements in central cube
     ! because the slices do not compute all their spectral elements in the cube
-    where(rmass_inner_core(:) <= 0.) rmass_inner_core = 1.
+    where(rmass_inner_core(:) <= 0.0_CUSTOM_REAL) rmass_inner_core = 1.0_CUSTOM_REAL
   endif
 
   call sync_all()

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_buffers_solver.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_buffers_solver.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_buffers_solver.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -288,9 +288,11 @@
 !---- a given slice can belong to at most one corner
   icount_corners = 0
   do imsg = 1,NCORNERSCHUNKS
+    ! if only two chunks then there is no second worker
     if(myrank == iproc_master_corners(imsg) .or. &
          myrank == iproc_worker1_corners(imsg) .or. &
-         myrank == iproc_worker2_corners(imsg)) then
+         (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) then
+
       icount_corners = icount_corners + 1
       if(icount_corners>1 .and. (NPROC_XI > 1 .or. NPROC_ETA > 1)) then
         print*,'error ',myrank,'icount_corners:',icount_corners
@@ -304,7 +306,7 @@
         write(filename,"('buffer_corners_chunks_master_msg',i6.6,'.txt')") imsg
       else if(myrank == iproc_worker1_corners(imsg)) then
         write(filename,"('buffer_corners_chunks_worker1_msg',i6.6,'.txt')") imsg
-      else if(myrank == iproc_worker2_corners(imsg)) then
+      else if( NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg)) then
         write(filename,"('buffer_corners_chunks_worker2_msg',i6.6,'.txt')") imsg
       endif
 
@@ -314,13 +316,19 @@
       if( ier /= 0 ) call exit_MPI(myrank,'error opening buffer_corners_chunks file')
 
       read(IIN,*) npoin1D_corner
-      if(npoin1D_corner /= NGLOB1D_RADIAL) &
+      if(npoin1D_corner /= NGLOB1D_RADIAL) then
+        print*,'error ',myrank,' npoin1D_corner: ',npoin1D_corner,'NGLOB1D_RADIAL:',NGLOB1D_RADIAL
+        print*,'iregion_code:',iregion_code
         call exit_MPI(myrank,'incorrect nb of points in corner buffer')
+      endif
       do ipoin1D = 1,npoin1D_corner
         read(IIN,*) iboolcorner(ipoin1D,icount_corners),xdummy,ydummy,zdummy
       enddo
       close(IIN)
+
     endif
+
+
   enddo
 
   endif

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_solver.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_solver.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_solver.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -36,7 +36,7 @@
               c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
               c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
               ibool,idoubling,ispec_is_tiso, &
-              is_on_a_slice_edge,rmass,rmass_ocean_load,nspec,nglob, &
+              rmass,rmass_ocean_load,nspec,nglob, &
               READ_KAPPA_MU,READ_TISO,TRANSVERSE_ISOTROPY, &
               ANISOTROPIC_3D_MANTLE,ANISOTROPIC_INNER_CORE,OCEANS,LOCAL_PATH,ABSORBING_CONDITIONS)
 
@@ -48,7 +48,7 @@
 
   integer iregion_code,myrank
 
-! flags to know if we should read Vs and anisotropy arrays
+  ! flags to know if we should read Vs and anisotropy arrays
   logical READ_KAPPA_MU,READ_TISO,TRANSVERSE_ISOTROPY,ANISOTROPIC_3D_MANTLE, &
     ANISOTROPIC_INNER_CORE,OCEANS,ABSORBING_CONDITIONS
 
@@ -63,47 +63,48 @@
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
     xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
 
-! material properties
+  ! material properties
   real(kind=CUSTOM_REAL) rhostore(NGLLX,NGLLY,NGLLZ,nspec_iso)
   real(kind=CUSTOM_REAL) kappavstore(NGLLX,NGLLY,NGLLZ,nspec_iso)
   real(kind=CUSTOM_REAL) muvstore(NGLLX,NGLLY,NGLLZ,nspec_iso)
 
-! additional arrays for anisotropy stored only where needed to save memory
+  ! additional arrays for anisotropy stored only where needed to save memory
   real(kind=CUSTOM_REAL) kappahstore(NGLLX,NGLLY,NGLLZ,nspec_tiso)
   real(kind=CUSTOM_REAL) muhstore(NGLLX,NGLLY,NGLLZ,nspec_tiso)
   real(kind=CUSTOM_REAL) eta_anisostore(NGLLX,NGLLY,NGLLZ,nspec_tiso)
 
-! additional arrays for full anisotropy
+  ! additional arrays for full anisotropy
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec_ani) :: &
     c11store,c12store,c13store,c14store,c15store,c16store, &
     c22store,c23store,c24store,c25store,c26store,c33store,c34store, &
     c35store,c36store,c44store,c45store,c46store,c55store,c56store,c66store
 
-! Stacey
+  ! Stacey
   real(kind=CUSTOM_REAL) rho_vp(NGLLX,NGLLY,NGLLZ,nspec)
   real(kind=CUSTOM_REAL) rho_vs(NGLLX,NGLLY,NGLLZ,nspec)
 
-! mass matrix and additional ocean load mass matrix
+  ! mass matrix and additional ocean load mass matrix
   real(kind=CUSTOM_REAL), dimension(nglob) :: rmass,rmass_ocean_load
 
-! global addressing
+  ! global addressing
   integer ibool(NGLLX,NGLLY,NGLLZ,nspec)
 
   integer, dimension(nspec) :: idoubling
 
-! this for non blocking MPI
-  logical, dimension(nspec) :: is_on_a_slice_edge
-
   logical, dimension(nspec) :: ispec_is_tiso
 
-! processor identification
+  ! local parameters
+  integer :: ier
+  logical,dimension(nspec) :: dummy_l
+  ! processor identification
   character(len=150) prname
-
-! create the name for the database of the current slide and region
+  
+  ! create the name for the database of the current slide and region
   call create_name_database(prname,myrank,iregion_code,LOCAL_PATH)
 
   open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_1.bin', &
-        status='old',action='read',form='unformatted')
+        status='old',action='read',form='unformatted',iostat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error opening solver_data_1.bin')
 
   read(IIN) xix
   read(IIN) xiy
@@ -115,14 +116,13 @@
   read(IIN) gammay
   read(IIN) gammaz
 
-! model arrays
+  ! model arrays
   read(IIN) rhostore
   read(IIN) kappavstore
 
   if(READ_KAPPA_MU) read(IIN) muvstore
 
-! for anisotropy, gravity and rotation
-
+  ! for anisotropy, gravity and rotation
   if(TRANSVERSE_ISOTROPY .and. READ_TISO) then
     read(IIN) kappahstore
     read(IIN) muhstore
@@ -161,7 +161,7 @@
     read(IIN) c66store
   endif
 
-! Stacey
+  ! Stacey
   if(ABSORBING_CONDITIONS) then
 
     if(iregion_code == IREGION_CRUST_MANTLE) then
@@ -173,18 +173,19 @@
 
   endif
 
-! mass matrix
+  ! mass matrix
   read(IIN) rmass
 
-! read additional ocean load mass matrix
+  ! read additional ocean load mass matrix
   if(OCEANS .and. iregion_code == IREGION_CRUST_MANTLE) read(IIN) rmass_ocean_load
 
   close(IIN)
 
-! read coordinates of the mesh
-
+  ! read coordinates of the mesh
   open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_2.bin', &
-       status='old',action='read',form='unformatted')
+       status='old',action='read',form='unformatted',iostat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error opening file solver_data_2.bin')
+  
   read(IIN) xstore
   read(IIN) ystore
   read(IIN) zstore
@@ -193,8 +194,8 @@
 
   read(IIN) idoubling
 
-  read(IIN) is_on_a_slice_edge
-
+  read(IIN) dummy_l ! is_on_a_slice_edge
+  
   read(IIN) ispec_is_tiso
 
   close(IIN)

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_mesh_databases.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -35,19 +35,9 @@
 
   include 'mpif.h'
 
-  ! local parameters
-  integer :: ier
-
   ! get MPI starting time
   time_start = MPI_WTIME()
 
-  ! allocates temporary arrays
-  allocate( is_on_a_slice_edge_crust_mantle(NSPEC_CRUST_MANTLE), &
-           is_on_a_slice_edge_inner_core(NSPEC_INNER_CORE), &
-           is_on_a_slice_edge_outer_core(NSPEC_OUTER_CORE), &
-           stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocating temporary is_on_a_slice_edge arrays')
-
   ! start reading the databases
   ! read arrays created by the mesher
 
@@ -66,15 +56,9 @@
   ! reads "addressing.txt" 2-D addressing for summation between slices with MPI
   call read_mesh_databases_addressing()
 
-  ! reads "iboolleft_..txt", "iboolright_..txt" (and "list_messages_..txt", "buffer_...txt") files and sets up MPI buffers
-  call read_mesh_databases_MPIbuffers()
+  ! sets up MPI interfaces, inner/outer elements and mesh coloring
+  call read_mesh_databases_MPI()
 
-  ! sets up MPI interfaces
-  call read_mesh_databases_MPIinter()
-
-  ! sets up inner/outer element arrays
-  call read_mesh_databases_InnerOuter()
-
   ! absorbing boundaries
   if(ABSORBING_CONDITIONS) then
     ! reads "stacey.bin" files and sets up arrays for Stacey conditions
@@ -91,11 +75,6 @@
     write(IMAIN,*)
   endif
 
-  ! frees temporary allocated arrays
-  deallocate(is_on_a_slice_edge_crust_mantle, &
-            is_on_a_slice_edge_outer_core, &
-            is_on_a_slice_edge_inner_core)
-
   end subroutine read_mesh_databases
 
 !
@@ -158,7 +137,7 @@
             c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle, &
             ibool_crust_mantle,dummy_i, &
             ispec_is_tiso_crust_mantle, &
-            is_on_a_slice_edge_crust_mantle,rmass_crust_mantle,rmass_ocean_load, &
+            rmass_crust_mantle,rmass_ocean_load, &
             NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
             READ_KAPPA_MU,READ_TISO,TRANSVERSE_ISOTROPY_VAL,ANISOTROPIC_3D_MANTLE_VAL, &
             ANISOTROPIC_INNER_CORE_VAL,OCEANS_VAL,LOCAL_PATH,ABSORBING_CONDITIONS)
@@ -222,7 +201,7 @@
             dummy_array,dummy_array,dummy_array, &
             dummy_array,dummy_array,dummy_array, &
             ibool_outer_core,dummy_idoubling_outer_core,dummy_ispec_is_tiso, &
-            is_on_a_slice_edge_outer_core,rmass_outer_core,rmass_ocean_load, &
+            rmass_outer_core,rmass_ocean_load, &
             NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
             READ_KAPPA_MU,READ_TISO,TRANSVERSE_ISOTROPY_VAL,ANISOTROPIC_3D_MANTLE_VAL, &
             ANISOTROPIC_INNER_CORE_VAL,OCEANS_VAL,LOCAL_PATH,ABSORBING_CONDITIONS)
@@ -289,7 +268,7 @@
             c44store_inner_core,dummy_array,dummy_array, &
             dummy_array,dummy_array,dummy_array, &
             ibool_inner_core,idoubling_inner_core,dummy_ispec_is_tiso, &
-            is_on_a_slice_edge_inner_core,rmass_inner_core,rmass_ocean_load, &
+            rmass_inner_core,rmass_ocean_load, &
             NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
             READ_KAPPA_MU,READ_TISO,TRANSVERSE_ISOTROPY_VAL,ANISOTROPIC_3D_MANTLE_VAL, &
             ANISOTROPIC_INNER_CORE_VAL,OCEANS_VAL,LOCAL_PATH,ABSORBING_CONDITIONS)
@@ -320,15 +299,18 @@
   include 'mpif.h'
 
   ! local parameters
-  integer njunk1,njunk2,njunk3
-
+  integer :: njunk1,njunk2,njunk3
+  integer :: ier
+  
   ! crust and mantle
   ! create name of database
   call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
 
   ! Stacey put back
   open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
-        status='old',form='unformatted',action='read')
+        status='old',form='unformatted',action='read',iostat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error opening crust_mantle boundary.bin file')
+  
   read(27) nspec2D_xmin_crust_mantle
   read(27) nspec2D_xmax_crust_mantle
   read(27) nspec2D_ymin_crust_mantle
@@ -371,7 +353,9 @@
 
   ! Stacey put back
   open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
-        status='old',form='unformatted',action='read')
+        status='old',form='unformatted',action='read',iostat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error opening outer_core boundary.bin file')
+  
   read(27) nspec2D_xmin_outer_core
   read(27) nspec2D_xmax_outer_core
   read(27) nspec2D_ymin_outer_core
@@ -413,7 +397,9 @@
 
   ! read info for vertical edges for central cube matching in inner core
   open(unit=27,file=prname(1:len_trim(prname))//'boundary.bin', &
-        status='old',form='unformatted',action='read')
+        status='old',form='unformatted',action='read',iostat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error opening inner_core boundary.bin file')
+  
   read(27) nspec2D_xmin_inner_core
   read(27) nspec2D_xmax_inner_core
   read(27) nspec2D_ymin_inner_core
@@ -437,7 +423,9 @@
     call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
 
     open(unit=27,file=prname(1:len_trim(prname))//'boundary_disc.bin', &
-          status='old',form='unformatted',action='read')
+          status='old',form='unformatted',action='read',iostat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error opening boundary_disc.bin file')
+  
     read(27) njunk1,njunk2,njunk3
     if (njunk1 /= NSPEC2D_MOHO .and. njunk2 /= NSPEC2D_400 .and. njunk3 /= NSPEC2D_670) &
                call exit_mpi(myrank, 'Error reading ibelm_disc.bin file')
@@ -479,15 +467,17 @@
 
   ! local parameters
   integer :: ier,iproc,iproc_read
-  integer :: NUM_FACES,NPROC_ONE_DIRECTION
 
   ! open file with global slice number addressing
   if(myrank == 0) then
     open(unit=IIN,file=trim(OUTPUT_FILES)//'/addressing.txt',status='old',action='read',iostat=ier)
     if( ier /= 0 ) call exit_mpi(myrank,'error opening addressing.txt')
+    
     do iproc = 0,NPROCTOT_VAL-1
       read(IIN,*) iproc_read,ichunk,iproc_xi,iproc_eta
+      
       if(iproc_read /= iproc) call exit_MPI(myrank,'incorrect slice number read')
+      
       addressing(ichunk,iproc_xi,iproc_eta) = iproc
       ichunk_slice(iproc) = ichunk
       iproc_xi_slice(iproc) = iproc_xi
@@ -503,7 +493,7 @@
   call MPI_BCAST(iproc_eta_slice,NPROCTOT_VAL,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
 
   ! output a topology map of slices - fix 20x by nproc
-  if (myrank == 0 .and. NCHUNKS_VAL == 6) then
+  if (myrank == 0 .and. NCHUNKS_VAL == 6 .and. NPROCTOT_VAL < 1000 ) then
     write(IMAIN,*) 'Spatial distribution of the slices'
     do iproc_xi = NPROC_XI_VAL-1, 0, -1
       write(IMAIN,'(20x)',advance='no')
@@ -554,363 +544,35 @@
   endif
 
   ! determine chunk number and local slice coordinates using addressing
+  ! (needed for stacey conditions)
   ichunk = ichunk_slice(myrank)
   iproc_xi = iproc_xi_slice(myrank)
   iproc_eta = iproc_eta_slice(myrank)
 
-  ! define maximum size for message buffers
-  ! use number of elements found in the mantle since it is the largest region
-  NGLOB2DMAX_XY = max(NGLOB2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE))
-
-  ! number of corners and faces shared between chunks and number of message types
-  if(NCHUNKS_VAL == 1 .or. NCHUNKS_VAL == 2) then
-    NCORNERSCHUNKS = 1
-    NUM_FACES = 1
-    NUM_MSG_TYPES = 1
-  else if(NCHUNKS_VAL == 3) then
-    NCORNERSCHUNKS = 1
-    NUM_FACES = 1
-    NUM_MSG_TYPES = 3
-  else if(NCHUNKS_VAL == 6) then
-    NCORNERSCHUNKS = 8
-    NUM_FACES = 4
-    NUM_MSG_TYPES = 3
-  else
-    call exit_MPI(myrank,'number of chunks must be either 1, 2, 3 or 6')
-  endif
-  ! if more than one chunk then same number of processors in each direction
-  NPROC_ONE_DIRECTION = NPROC_XI_VAL
-  ! total number of messages corresponding to these common faces
-  NUMMSGS_FACES = NPROC_ONE_DIRECTION*NUM_FACES*NUM_MSG_TYPES
-
-  ! debug checks with compiled value
-  !if( NUMMSGS_FACES /= NUMMSGS_FACES_VAL ) then
-  !  print*,'check: NUMMSGS_FACES',NUMMSGS_FACES,NUMMSGS_FACES_VAL
-  !  call exit_mpi(myrank,'error NUMMSGS_FACES_VAL, please recompile solver')
-  !endif
-
   end subroutine read_mesh_databases_addressing
 
+
 !
 !-------------------------------------------------------------------------------------------------
 !
 
-  subroutine read_mesh_databases_MPIbuffers()
-
+  subroutine read_mesh_databases_MPI()
+  
   use specfem_par
   use specfem_par_crustmantle
   use specfem_par_innercore
-  use specfem_par_outercore
+  use specfem_par_outercore  
   implicit none
 
   ! local parameters
+  real :: percentage_edge
   integer :: ier
-  !character(len=150) :: filename
 
-  ! read 2-D addressing for summation between slices with MPI
+  ! read MPI interfaces from file
 
-  ! mantle and crust
-  if(myrank == 0) write(IMAIN,*) 'crust/mantle region:'
-
-  call read_arrays_buffers_solver(IREGION_CRUST_MANTLE,myrank,iboolleft_xi_crust_mantle, &
-     iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
-     npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
-     iprocfrom_faces,iprocto_faces,imsg_type, &
-     iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-     iboolfaces_crust_mantle,npoin2D_faces_crust_mantle, &
-     iboolcorner_crust_mantle, &
-     NGLOB2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE), &
-     NGLOB2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_XY,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
-     NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT_VAL,NPROC_XI_VAL,NPROC_ETA_VAL,LOCAL_PATH,NCHUNKS_VAL)
-
-  ! outer core
-  if(myrank == 0) write(IMAIN,*) 'outer core region:'
-
-  call read_arrays_buffers_solver(IREGION_OUTER_CORE,myrank, &
-     iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
-     npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
-     iprocfrom_faces,iprocto_faces,imsg_type, &
-     iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-     iboolfaces_outer_core,npoin2D_faces_outer_core, &
-     iboolcorner_outer_core, &
-     NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE), &
-     NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE),NGLOB2DMAX_XY,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
-     NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT_VAL,NPROC_XI_VAL,NPROC_ETA_VAL,LOCAL_PATH,NCHUNKS_VAL)
-
-  ! inner core
-  if(myrank == 0) write(IMAIN,*) 'inner core region:'
-
-  call read_arrays_buffers_solver(IREGION_INNER_CORE,myrank, &
-     iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
-     npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
-     iprocfrom_faces,iprocto_faces,imsg_type, &
-     iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-     iboolfaces_inner_core,npoin2D_faces_inner_core, &
-     iboolcorner_inner_core, &
-     NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE), &
-     NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE),NGLOB2DMAX_XY,NGLOB1D_RADIAL(IREGION_INNER_CORE), &
-     NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT_VAL,NPROC_XI_VAL,NPROC_ETA_VAL,LOCAL_PATH,NCHUNKS_VAL)
-
-  ! added this to reduce the size of the buffers
-  ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
-  npoin2D_max_all_CM_IC = max(maxval(npoin2D_xi_crust_mantle(:) + npoin2D_xi_inner_core(:)), &
-                        maxval(npoin2D_eta_crust_mantle(:) + npoin2D_eta_inner_core(:)))
-
-  allocate(buffer_send_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED), &
-          buffer_received_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED),stat=ier)
-  if( ier /= 0 ) call exit_MPI(myrank,'error allocating mpi buffer')
-
-  allocate(b_buffer_send_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED), &
-          b_buffer_received_faces(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED),stat=ier)
-  if( ier /= 0 ) call exit_MPI(myrank,'error allocating mpi b_buffer')
-
-  ! central cube buffers
-  if(INCLUDE_CENTRAL_CUBE) then
-
-    if(myrank == 0) write(IMAIN,*) 'including central cube'
-
-    ! compute number of messages to expect in cube as well as their size
-    call comp_central_cube_buffer_size(iproc_xi,iproc_eta,ichunk, &
-                NPROC_XI_VAL,NPROC_ETA_VAL,NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
-                nb_msgs_theor_in_cube,npoin2D_cube_from_slices)
-
-    ! this value is used for dynamic memory allocation, therefore make sure it is never zero
-    if(nb_msgs_theor_in_cube > 0) then
-      non_zero_nb_msgs_theor_in_cube = nb_msgs_theor_in_cube
-    else
-      non_zero_nb_msgs_theor_in_cube = 1
-    endif
-
-    ! allocate buffers for cube and slices
-    allocate(sender_from_slices_to_cube(non_zero_nb_msgs_theor_in_cube), &
-            buffer_all_cube_from_slices(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM), &
-            b_buffer_all_cube_from_slices(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM), &
-            buffer_slices(npoin2D_cube_from_slices,NDIM), &
-            b_buffer_slices(npoin2D_cube_from_slices,NDIM), &
-            buffer_slices2(npoin2D_cube_from_slices,NDIM), &
-            ibool_central_cube(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices),stat=ier)
-    if( ier /= 0 ) call exit_MPI(myrank,'error allocating cube buffers')
-
-    ! handles the communications with the central cube if it was included in the mesh
-    ! create buffers to assemble with the central cube
-    call create_central_cube_buffers(myrank,iproc_xi,iproc_eta,ichunk, &
-               NPROC_XI_VAL,NPROC_ETA_VAL,NCHUNKS_VAL,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
-               NSPEC2DMAX_XMIN_XMAX(IREGION_INNER_CORE),NSPEC2DMAX_YMIN_YMAX(IREGION_INNER_CORE), &
-               NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
-               addressing,ibool_inner_core,idoubling_inner_core, &
-               xstore_inner_core,ystore_inner_core,zstore_inner_core, &
-               nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
-               nspec2D_ymin_inner_core,nspec2D_ymax_inner_core, &
-               ibelm_xmin_inner_core,ibelm_xmax_inner_core, &
-               ibelm_ymin_inner_core,ibelm_ymax_inner_core,ibelm_bottom_inner_core, &
-               nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices, &
-               receiver_cube_from_slices,sender_from_slices_to_cube,ibool_central_cube, &
-               buffer_slices,buffer_slices2,buffer_all_cube_from_slices)
-
-    if(myrank == 0) write(IMAIN,*) ''
-
-  else
-
-    ! allocate fictitious buffers for cube and slices with a dummy size
-    ! just to be able to use them as arguments in subroutine calls
-    allocate(sender_from_slices_to_cube(1), &
-            buffer_all_cube_from_slices(1,1,1), &
-            b_buffer_all_cube_from_slices(1,1,1), &
-            buffer_slices(1,1), &
-            b_buffer_slices(1,1), &
-            buffer_slices2(1,1), &
-            ibool_central_cube(1,1),stat=ier)
-    if( ier /= 0 ) call exit_MPI(myrank,'error allocating dummy buffers')
-
-  endif
-
-  ! note: fix_... routines below update is_on_a_slice_edge_.. arrays:
-  !          assign flags for each element which is on a rim of the slice
-  !          thus, they include elements on top and bottom not shared with other MPI partitions
-  !
-  !          we will re-set these flags when setting up inner/outer elements, but will
-  !          use these arrays for now as initial guess for the search for elements which share a global point
-  !          between different MPI processes
-  call fix_non_blocking_slices(is_on_a_slice_edge_crust_mantle,iboolright_xi_crust_mantle, &
-         iboolleft_xi_crust_mantle,iboolright_eta_crust_mantle,iboolleft_eta_crust_mantle, &
-         npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle,ibool_crust_mantle, &
-         mask_ibool,NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE,NGLOB2DMAX_XMIN_XMAX_CM,NGLOB2DMAX_YMIN_YMAX_CM)
-
-  call fix_non_blocking_slices(is_on_a_slice_edge_outer_core,iboolright_xi_outer_core, &
-         iboolleft_xi_outer_core,iboolright_eta_outer_core,iboolleft_eta_outer_core, &
-         npoin2D_xi_outer_core,npoin2D_eta_outer_core,ibool_outer_core, &
-         mask_ibool,NSPEC_OUTER_CORE,NGLOB_OUTER_CORE,NGLOB2DMAX_XMIN_XMAX_OC,NGLOB2DMAX_YMIN_YMAX_OC)
-
-  call fix_non_blocking_slices(is_on_a_slice_edge_inner_core,iboolright_xi_inner_core, &
-         iboolleft_xi_inner_core,iboolright_eta_inner_core,iboolleft_eta_inner_core, &
-         npoin2D_xi_inner_core,npoin2D_eta_inner_core,ibool_inner_core, &
-         mask_ibool,NSPEC_INNER_CORE,NGLOB_INNER_CORE,NGLOB2DMAX_XMIN_XMAX_IC,NGLOB2DMAX_YMIN_YMAX_IC)
-
-  if(INCLUDE_CENTRAL_CUBE) then
-    ! updates flags for elements on slice boundaries
-    call fix_non_blocking_central_cube(is_on_a_slice_edge_inner_core, &
-         ibool_inner_core,NSPEC_INNER_CORE,NGLOB_INNER_CORE,nb_msgs_theor_in_cube,ibelm_bottom_inner_core, &
-         idoubling_inner_core,npoin2D_cube_from_slices, &
-         ibool_central_cube,NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
-         ichunk,NPROC_XI_VAL)
-  endif
-
-  ! debug: saves element flags
   ! crust mantle
-  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_crust_mantle_proc',myrank
-  !call write_VTK_data_elem_l(NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
-  !                          xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
-  !                          ibool_crust_mantle, &
-  !                          is_on_a_slice_edge_crust_mantle,filename)
-  ! outer core
-  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_outer_core_proc',myrank
-  !call write_VTK_data_elem_l(NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
-  !                          xstore_outer_core,ystore_outer_core,zstore_outer_core, &
-  !                          ibool_outer_core, &
-  !                          is_on_a_slice_edge_outer_core,filename)
-  ! inner core
-  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_is_on_a_slice_edge_inner_core_proc',myrank
-  !call write_VTK_data_elem_l(NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
-  !                          xstore_inner_core,ystore_inner_core,zstore_inner_core, &
-  !                          ibool_inner_core, &
-  !                          is_on_a_slice_edge_inner_core,filename)
+  call read_mesh_databases_MPI_CM()
 
-  end subroutine read_mesh_databases_MPIbuffers
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine read_mesh_databases_MPIinter()
-
-! sets up interfaces between MPI processes
-
-  use specfem_par
-  use specfem_par_crustmantle
-  use specfem_par_innercore
-  use specfem_par_outercore
-  implicit none
-
-  include 'mpif.h'
-
-  ! local parameters
-  integer :: ier,ndim_assemble
-
-  ! temporary buffers for send and receive between faces of the slices and the chunks
-  real(kind=CUSTOM_REAL), dimension(npoin2D_max_all_CM_IC) ::  &
-    buffer_send_faces_scalar,buffer_received_faces_scalar
-
-  ! assigns initial maximum arrays
-  ! for global slices, maximum number of neighbor is around 17 ( 8 horizontal, max of 8 on bottom )
-  integer, parameter :: MAX_NEIGHBOURS = 8 + NCORNERSCHUNKS_VAL
-  integer, dimension(MAX_NEIGHBOURS) :: my_neighbours,nibool_neighbours
-  integer, dimension(:,:),allocatable :: ibool_neighbours
-  integer :: max_nibool
-  real(kind=CUSTOM_REAL),dimension(:),allocatable :: test_flag
-  integer,dimension(:),allocatable :: dummy_i
-  integer :: i,j,k,ispec,iglob
-  !daniel: debug
-  !character(len=150) :: filename
-
-  ! estimates initial maximum ibool array
-  max_nibool = npoin2D_max_all_CM_IC * NUMFACES_SHARED &
-               + non_zero_nb_msgs_theor_in_cube*npoin2D_cube_from_slices
-
-  allocate(ibool_neighbours(max_nibool,MAX_NEIGHBOURS), stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocating ibool_neighbours')
-
-
-! sets up MPI interfaces
-! crust mantle region
-  if( myrank == 0 ) write(IMAIN,*) 'crust mantle mpi:'
-  allocate(test_flag(NGLOB_CRUST_MANTLE), &
-          stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag')
-
-  ! sets flag to rank id (+1 to avoid problems with zero rank)
-  test_flag(:) = myrank + 1.0
-
-  ! assembles values
-  call assemble_MPI_scalar_block(myrank,test_flag, &
-            NGLOB_CRUST_MANTLE, &
-            iproc_xi,iproc_eta,ichunk,addressing, &
-            iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
-            npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
-            iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
-            iprocfrom_faces,iprocto_faces,imsg_type, &
-            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
-            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
-            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
-            NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_CRUST_MANTLE), &
-            NGLOB2DMAX_XMIN_XMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_YMIN_YMAX(IREGION_CRUST_MANTLE),NGLOB2DMAX_XY,NCHUNKS_VAL)
-
-  ! removes own myrank id (+1)
-  test_flag(:) = test_flag(:) - ( myrank + 1.0)
-
-  ! debug: saves array
-  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_crust_mantle_proc',myrank
-  !call write_VTK_glob_points(NGLOB_CRUST_MANTLE, &
-  !                      xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
-  !                      test_flag,filename)
-
-  allocate(dummy_i(NSPEC_CRUST_MANTLE),stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocating dummy_i')
-
-  ! determines neighbor rank for shared faces
-  call rmd_get_MPI_interfaces(myrank,NGLOB_CRUST_MANTLE,NSPEC_CRUST_MANTLE, &
-                            test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
-                            num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
-                            max_nibool,MAX_NEIGHBOURS, &
-                            ibool_crust_mantle,&
-                            is_on_a_slice_edge_crust_mantle, &
-                            IREGION_CRUST_MANTLE,.false.,dummy_i,INCLUDE_CENTRAL_CUBE)
-
-  deallocate(test_flag)
-  deallocate(dummy_i)
-
-  ! stores MPI interfaces informations
-  allocate(my_neighbours_crust_mantle(num_interfaces_crust_mantle), &
-          nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
-          stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array my_neighbours_crust_mantle etc.')
-  my_neighbours_crust_mantle = -1
-  nibool_interfaces_crust_mantle = 0
-
-  ! copies interfaces arrays
-  if( num_interfaces_crust_mantle > 0 ) then
-    allocate(ibool_interfaces_crust_mantle(max_nibool_interfaces_crust_mantle,num_interfaces_crust_mantle), &
-           stat=ier)
-    if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_crust_mantle')
-    ibool_interfaces_crust_mantle = 0
-
-    ! ranks of neighbour processes
-    my_neighbours_crust_mantle(:) = my_neighbours(1:num_interfaces_crust_mantle)
-    ! number of global ibool entries on each interface
-    nibool_interfaces_crust_mantle(:) = nibool_neighbours(1:num_interfaces_crust_mantle)
-    ! global iglob point ids on each interface
-    ibool_interfaces_crust_mantle(:,:) = ibool_neighbours(1:max_nibool_interfaces_crust_mantle,1:num_interfaces_crust_mantle)
-  else
-    ! dummy allocation (fortran90 should allow allocate statement with zero array size)
-    max_nibool_interfaces_crust_mantle = 0
-    allocate(ibool_interfaces_crust_mantle(0,0),stat=ier)
-  endif
-
-  ! debug: saves 1. MPI interface
-  !if( num_interfaces_crust_mantle >= 1 ) then
-  !  write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_1_points_crust_mantle_proc',myrank
-  !  call write_VTK_data_points(NGLOB_CRUST_MANTLE, &
-  !                      xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
-  !                      ibool_interfaces_crust_mantle(1:nibool_interfaces_crust_mantle(1),1), &
-  !                      nibool_interfaces_crust_mantle(1),filename)
-  !endif
-
-  ! checks addressing
-  call rmd_test_MPI_neighbours(num_interfaces_crust_mantle,my_neighbours_crust_mantle,nibool_interfaces_crust_mantle)
-
-  ! allocates MPI buffers
-  ! crust mantle
   allocate(buffer_send_vector_crust_mantle(NDIM,max_nibool_interfaces_crust_mantle,num_interfaces_crust_mantle), &
           buffer_recv_vector_crust_mantle(NDIM,max_nibool_interfaces_crust_mantle,num_interfaces_crust_mantle), &
           request_send_vector_crust_mantle(num_interfaces_crust_mantle), &
@@ -926,101 +588,9 @@
     if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_crust_mantle etc.')
   endif
 
-  ! checks with assembly of test fields
-  call rmd_test_MPI_cm()
-
-
-! outer core region
-  if( myrank == 0 ) write(IMAIN,*) 'outer core mpi:'
-
-  allocate(test_flag(NGLOB_OUTER_CORE), &
-          stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag outer core')
-
-  ! sets flag to rank id (+1 to avoid problems with zero rank)
-  test_flag(:) = myrank + 1.0
-
-  ! assembles values
-  call assemble_MPI_scalar_block(myrank,test_flag, &
-            NGLOB_OUTER_CORE, &
-            iproc_xi,iproc_eta,ichunk,addressing, &
-            iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
-            npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
-            iboolfaces_outer_core,iboolcorner_outer_core, &
-            iprocfrom_faces,iprocto_faces,imsg_type, &
-            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
-            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
-            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
-            NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_OUTER_CORE), &
-            NGLOB2DMAX_XMIN_XMAX(IREGION_OUTER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_OUTER_CORE),NGLOB2DMAX_XY,NCHUNKS_VAL)
-
-
-  ! removes own myrank id (+1)
-  test_flag(:) = test_flag(:) - ( myrank + 1.0)
-
-  ! debug: saves array
-  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_outer_core_proc',myrank
-  !call write_VTK_glob_points(NGLOB_OUTER_CORE, &
-  !                      xstore_outer_core,ystore_outer_core,zstore_outer_core, &
-  !                      test_flag,filename)
-
-  allocate(dummy_i(NSPEC_OUTER_CORE),stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocating dummy_i')
-
-  ! determines neighbor rank for shared faces
-  call rmd_get_MPI_interfaces(myrank,NGLOB_OUTER_CORE,NSPEC_OUTER_CORE, &
-                            test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
-                            num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
-                            max_nibool,MAX_NEIGHBOURS, &
-                            ibool_outer_core,&
-                            is_on_a_slice_edge_outer_core, &
-                            IREGION_OUTER_CORE,.false.,dummy_i,INCLUDE_CENTRAL_CUBE)
-
-  deallocate(test_flag)
-  deallocate(dummy_i)
-
-  ! stores MPI interfaces informations
-  allocate(my_neighbours_outer_core(num_interfaces_outer_core), &
-          nibool_interfaces_outer_core(num_interfaces_outer_core), &
-          stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array my_neighbours_outer_core etc.')
-  my_neighbours_outer_core = -1
-  nibool_interfaces_outer_core = 0
-
-  ! copies interfaces arrays
-  if( num_interfaces_outer_core > 0 ) then
-    allocate(ibool_interfaces_outer_core(max_nibool_interfaces_outer_core,num_interfaces_outer_core), &
-           stat=ier)
-    if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_outer_core')
-    ibool_interfaces_outer_core = 0
-
-    ! ranks of neighbour processes
-    my_neighbours_outer_core(:) = my_neighbours(1:num_interfaces_outer_core)
-    ! number of global ibool entries on each interface
-    nibool_interfaces_outer_core(:) = nibool_neighbours(1:num_interfaces_outer_core)
-    ! global iglob point ids on each interface
-    ibool_interfaces_outer_core(:,:) = ibool_neighbours(1:max_nibool_interfaces_outer_core,1:num_interfaces_outer_core)
-  else
-    ! dummy allocation (fortran90 should allow allocate statement with zero array size)
-    max_nibool_interfaces_outer_core = 0
-    allocate(ibool_interfaces_outer_core(0,0),stat=ier)
-  endif
-
-  ! debug: saves 1. MPI interface
-  !if( num_interfaces_outer_core >= 1 ) then
-  !  write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_1_points_outer_core_proc',myrank
-  !  call write_VTK_data_points(NGLOB_OUTER_CORE, &
-  !                      xstore_outer_core,ystore_outer_core,zstore_outer_core, &
-  !                      ibool_interfaces_outer_core(1:nibool_interfaces_outer_core(1),1), &
-  !                      nibool_interfaces_outer_core(1),filename)
-  !endif
-
-  ! checks addressing
-  call rmd_test_MPI_neighbours(num_interfaces_outer_core,my_neighbours_outer_core,nibool_interfaces_outer_core)
-
-  ! allocates MPI buffers
   ! outer core
+  call read_mesh_databases_MPI_OC()
+  
   allocate(buffer_send_scalar_outer_core(max_nibool_interfaces_outer_core,num_interfaces_outer_core), &
           buffer_recv_scalar_outer_core(max_nibool_interfaces_outer_core,num_interfaces_outer_core), &
           request_send_scalar_outer_core(num_interfaces_outer_core), &
@@ -1036,157 +606,9 @@
     if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_outer_core etc.')
   endif
 
-  ! checks with assembly of test fields
-  call rmd_test_MPI_oc()
-
-
-! inner core
-  if( myrank == 0 ) write(IMAIN,*) 'inner core mpi:'
-
-  allocate(test_flag(NGLOB_INNER_CORE), &
-          stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_flag inner core')
-
-  ! sets flag to rank id (+1 to avoid problems with zero rank)
-  test_flag(:) = 0.0
-  do ispec=1,NSPEC_INNER_CORE
-    ! suppress fictitious elements in central cube
-    if(idoubling_inner_core(ispec) == IFLAG_IN_FICTITIOUS_CUBE) cycle
-
-    ! sets flags
-    do k = 1,NGLLZ
-      do j = 1,NGLLY
-        do i = 1,NGLLX
-          iglob = ibool_inner_core(i,j,k,ispec)
-          test_flag(iglob) = myrank + 1.0
-        enddo
-      enddo
-    enddo
-  enddo
-
-  ! assembles values
-  call assemble_MPI_scalar_block(myrank,test_flag, &
-            NGLOB_INNER_CORE, &
-            iproc_xi,iproc_eta,ichunk,addressing, &
-            iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
-            npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
-            iboolfaces_inner_core,iboolcorner_inner_core, &
-            iprocfrom_faces,iprocto_faces,imsg_type, &
-            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
-            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
-            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
-            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
-            NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL(IREGION_INNER_CORE), &
-            NGLOB2DMAX_XMIN_XMAX(IREGION_INNER_CORE),NGLOB2DMAX_YMIN_YMAX(IREGION_INNER_CORE),NGLOB2DMAX_XY,NCHUNKS_VAL)
-
-  ! debug: saves array
-  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_inner_core_A_proc',myrank
-  !call write_VTK_glob_points(NGLOB_INNER_CORE, &
-  !                      xstore_inner_core,ystore_inner_core,zstore_inner_core, &
-  !                      test_flag,filename)
-  ! debug: idoubling inner core
-  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_idoubling_inner_core_proc',myrank
-  !call write_VTK_data_elem_i(NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
-  !                          xstore_inner_core,ystore_inner_core,zstore_inner_core, &
-  !                          ibool_inner_core, &
-  !                          idoubling_inner_core,filename)
-  !call sync_all()
-
-  ! including central cube
-  if(INCLUDE_CENTRAL_CUBE) then
-
-    if( myrank == 0 ) write(IMAIN,*) 'inner core with central cube mpi:'
-
-    ! test_flag is a scalar, not a vector
-    ndim_assemble = 1
-
-    ! use central cube buffers to assemble the inner core mass matrix with the central cube
-    call assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
-                 npoin2D_cube_from_slices, buffer_all_cube_from_slices, &
-                 buffer_slices, buffer_slices2, ibool_central_cube, &
-                 receiver_cube_from_slices, ibool_inner_core, &
-                 idoubling_inner_core, NSPEC_INNER_CORE, &
-                 ibelm_bottom_inner_core, NSPEC2D_BOTTOM(IREGION_INNER_CORE), &
-                 NGLOB_INNER_CORE, &
-                 test_flag,ndim_assemble, &
-                 iproc_eta,addressing,NCHUNKS_VAL,NPROC_XI_VAL,NPROC_ETA_VAL)
-  endif
-
-  ! removes own myrank id (+1)
-  test_flag = test_flag - ( myrank + 1.0)
-  where( test_flag < 0.0 ) test_flag = 0.0
-
-  ! debug: saves array
-  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_test_flag_inner_core_B_proc',myrank
-  !call write_VTK_glob_points(NGLOB_INNER_CORE, &
-  !                    xstore_inner_core,ystore_inner_core,zstore_inner_core, &
-  !                    test_flag,filename)
-  !call sync_all()
-
-  ! in sequential order, for testing purpose
-  do i=0,NPROCTOT_VAL - 1
-    if( myrank == i ) then
-      ! gets new interfaces for inner_core without central cube yet
-      ! determines neighbor rank for shared faces
-      call rmd_get_MPI_interfaces(myrank,NGLOB_INNER_CORE,NSPEC_INNER_CORE, &
-                            test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
-                            num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
-                            max_nibool,MAX_NEIGHBOURS, &
-                            ibool_inner_core,&
-                            is_on_a_slice_edge_inner_core, &
-                            IREGION_INNER_CORE,.false.,idoubling_inner_core,INCLUDE_CENTRAL_CUBE)
-
-    endif
-    call sync_all()
-  enddo
-
-
-  deallocate(test_flag)
-  call sync_all()
-
-  ! stores MPI interfaces informations
-  allocate(my_neighbours_inner_core(num_interfaces_inner_core), &
-          nibool_interfaces_inner_core(num_interfaces_inner_core), &
-          stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array my_neighbours_inner_core etc.')
-  my_neighbours_inner_core = -1
-  nibool_interfaces_inner_core = 0
-
-  ! copies interfaces arrays
-  if( num_interfaces_inner_core > 0 ) then
-    allocate(ibool_interfaces_inner_core(max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
-           stat=ier)
-    if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_inner_core')
-    ibool_interfaces_inner_core = 0
-
-    ! ranks of neighbour processes
-    my_neighbours_inner_core(:) = my_neighbours(1:num_interfaces_inner_core)
-    ! number of global ibool entries on each interface
-    nibool_interfaces_inner_core(:) = nibool_neighbours(1:num_interfaces_inner_core)
-    ! global iglob point ids on each interface
-    ibool_interfaces_inner_core(:,:) = ibool_neighbours(1:max_nibool_interfaces_inner_core,1:num_interfaces_inner_core)
-  else
-    ! dummy allocation (fortran90 should allow allocate statement with zero array size)
-    max_nibool_interfaces_inner_core = 0
-    allocate(ibool_interfaces_inner_core(0,0),stat=ier)
-  endif
-
-  ! debug: saves MPI interfaces
-  !do i=1,num_interfaces_inner_core
-  !  write(filename,'(a,i6.6,a,i2.2)') trim(OUTPUT_FILES)//'/MPI_points_inner_core_proc',myrank, &
-  !                  '_',my_neighbours_inner_core(i)
-  !  call write_VTK_data_points(NGLOB_INNER_CORE, &
-  !                    xstore_inner_core,ystore_inner_core,zstore_inner_core, &
-  !                    ibool_interfaces_inner_core(1:nibool_interfaces_inner_core(i),i), &
-  !                    nibool_interfaces_inner_core(i),filename)
-  !enddo
-  !call sync_all()
-
-  ! checks addressing
-  call rmd_test_MPI_neighbours(num_interfaces_inner_core,my_neighbours_inner_core,nibool_interfaces_inner_core)
-
-  ! allocates MPI buffers
   ! inner core
+  call read_mesh_databases_MPI_IC()
+
   allocate(buffer_send_vector_inner_core(NDIM,max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
           buffer_recv_vector_inner_core(NDIM,max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
           request_send_vector_inner_core(num_interfaces_inner_core), &
@@ -1202,1182 +624,290 @@
     if( ier /= 0 ) call exit_mpi(myrank,'error allocating array b_buffer_send_vector_inner_core etc.')
   endif
 
-  ! checks with assembly of test fields
-  call rmd_test_MPI_ic()
 
-  ! synchronizes MPI processes
-  call sync_all()
+  ! user output
+  if(myrank == 0) then
+    write(IMAIN,*) 'for overlapping of communications with calculations:'
+    write(IMAIN,*)
 
-  ! frees temporary array
-  deallocate(ibool_neighbours)
+    percentage_edge = 100. * nspec_outer_crust_mantle / real(NSPEC_CRUST_MANTLE)
+    write(IMAIN,*) 'percentage of edge elements in crust/mantle ',percentage_edge,'%'
+    write(IMAIN,*) 'percentage of volume elements in crust/mantle ',100. - percentage_edge,'%'
+    write(IMAIN,*)
 
-  end subroutine read_mesh_databases_MPIinter
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine rmd_get_MPI_interfaces(myrank,NGLOB,NSPEC, &
-                                    test_flag,my_neighbours,nibool_neighbours,ibool_neighbours, &
-                                    num_interfaces,max_nibool_interfaces, &
-                                    max_nibool,MAX_NEIGHBOURS, &
-                                    ibool,&
-                                    is_on_a_slice_edge, &
-                                    IREGION,add_central_cube,idoubling,INCLUDE_CENTRAL_CUBE)
-
-  use constants
-
-  implicit none
-
-  include 'mpif.h'
-
-  integer,intent(in) :: myrank,NGLOB,NSPEC
-
-  real(kind=CUSTOM_REAL),dimension(NGLOB),intent(in) :: test_flag
-
-  integer,intent(in) :: max_nibool
-  integer,intent(in) :: MAX_NEIGHBOURS
-  integer, dimension(MAX_NEIGHBOURS),intent(inout) :: my_neighbours,nibool_neighbours
-  integer, dimension(max_nibool,MAX_NEIGHBOURS),intent(inout) :: ibool_neighbours
-
-  integer,intent(out) :: num_interfaces,max_nibool_interfaces
-
-  integer,dimension(NGLLX,NGLLY,NGLLZ,NSPEC),intent(in) :: ibool
-
-  logical,dimension(NSPEC),intent(inout) :: is_on_a_slice_edge
-
-  integer,intent(in) :: IREGION
-  logical,intent(in) :: add_central_cube
-  integer,dimension(NSPEC),intent(in) :: idoubling
-
-  logical,intent(in) :: INCLUDE_CENTRAL_CUBE
-
-  ! local parameters
-  integer :: ispec,iglob,j,k
-  integer :: iface,iedge,icorner
-  integer :: ii,iinterface,icurrent,rank
-  integer :: npoin
-  logical :: is_done,ispec_is_outer
-  integer,dimension(NGLOB) :: work_test_flag
-  logical,dimension(NSPEC) :: work_ispec_is_outer
-
-  integer,parameter :: MID = (NGLLX+1)/2
-
-  ! initializes
-  if( add_central_cube) then
-    ! adds points to existing inner_core interfaces
-    iinterface = num_interfaces
-    work_ispec_is_outer(:) = is_on_a_slice_edge(:)
-  else
-    ! creates new interfaces
-    iinterface = 0
-    num_interfaces = 0
-    max_nibool_interfaces = 0
-    my_neighbours(:) = -1
-    nibool_neighbours(:) = 0
-    ibool_neighbours(:,:) = 0
-    work_ispec_is_outer(:) = .false.
-  endif
-
-  ! makes working copy (converted to nearest integers)
-  work_test_flag(:) = nint( test_flag(:) )
-
-  ! loops over all elements
-  do ispec = 1,NSPEC
-
-    ! exclude elements in inner part of slice
-    !if( .not. is_on_a_slice_edge(ispec) ) cycle
-
-    ! exclude elements in fictitious core
-    if( IREGION == IREGION_INNER_CORE) then
-      if( idoubling(ispec) == IFLAG_IN_FICTITIOUS_CUBE ) cycle
-    endif
-
-    ! sets flag if element has global points shared with other processes
-    ispec_is_outer = .false.
-
-    ! 1. finds neighbours which share a whole face with this process
-    ! (faces are shared only with 1 other neighbour process)
-
-    ! loops over all faces of element
-    do iface = 1, 6
-
-      ! chooses a point inside face
-      select case( iface )
-      case( 1 )
-        ! face I == 1
-        iglob = ibool(1,MID,MID,ispec)
-      case( 2 )
-        ! face I == NGLLX
-        iglob = ibool(NGLLX,MID,MID,ispec)
-      case( 3 )
-        ! face J == 1
-        iglob = ibool(MID,1,MID,ispec)
-      case( 4 )
-        ! face J == NGLLY
-        iglob = ibool(MID,NGLLY,MID,ispec)
-      case( 5 )
-        ! face K == 1
-        iglob = ibool(MID,MID,1,ispec)
-      case( 6 )
-        ! face K == NGLLZ
-        iglob = ibool(MID,MID,NGLLZ,ispec)
-      end select
-
-      ! checks assembled flag on global point
-      if( work_test_flag(iglob) > 0 ) then
-        ispec_is_outer = .true.
-
-        ! rank of neighbor process
-        rank = work_test_flag(iglob) - 1
-
-        ! checks ranks range
-        if( rank < 0 .or. rank >= NPROCTOT_VAL ) then
-          print*,'error face rank: ',myrank,'ispec=',ispec
-          print*,'  neighbor rank = ',rank,'exceeds total nproc:',NPROCTOT_VAL
-          print*,'  face ',iface
-          call exit_mpi(myrank,'error face neighbor mpi rank')
-        endif
-
-        ! checks if already stored
-        icurrent = 0
-        is_done = .false.
-        do ii = 1,iinterface
-          if( rank == my_neighbours(ii) ) then
-            icurrent = ii
-            is_done = .true.
-            exit
-          endif
-        enddo
-
-        ! updates interfaces array
-        if( .not. is_done ) then
-          iinterface = iinterface + 1
-          if( iinterface > MAX_NEIGHBOURS ) &
-            call exit_mpi(myrank,'interface face exceeds MAX_NEIGHBOURS range')
-          ! adds as neighbor new interface
-          my_neighbours(iinterface) = rank
-          icurrent = iinterface
-        endif
-        if( icurrent == 0 ) &
-          call exit_mpi(myrank,'could not find current interface for this neighbor, please check my_neighbours')
-
-        ! adds interface points and removes neighbor flag from face
-        ! assumes NGLLX == NGLLY == NGLLZ
-        do k=1,NGLLX
-          do j=1,NGLLX
-            select case( iface )
-            case( 1 )
-              ! face I == 1
-              iglob = ibool(1,j,k,ispec)
-            case( 2 )
-              ! face I == NGLLX
-              iglob = ibool(NGLLX,j,k,ispec)
-            case( 3 )
-              ! face J == 1
-              iglob = ibool(j,1,k,ispec)
-            case( 4 )
-              ! face J == NGLLY
-              iglob = ibool(j,NGLLY,k,ispec)
-            case( 5 )
-              ! face K == 1
-              iglob = ibool(j,k,1,ispec)
-            case( 6 )
-              ! face K == NGLLZ
-              iglob = ibool(j,k,NGLLZ,ispec)
-            end select
-
-            ! checks that we take each global point (on edges and corners) only once
-            call rmd_add_interface_point(iglob,rank,icurrent, &
-                                        nibool_neighbours,MAX_NEIGHBOURS, &
-                                        ibool_neighbours,max_nibool, &
-                                        work_test_flag,NGLOB,myrank, &
-                                        .true.,add_central_cube)
-            ! debug
-            if( work_test_flag(iglob) < 0 ) then
-              if( IREGION == IREGION_INNER_CORE .and. INCLUDE_CENTRAL_CUBE ) then
-                ! we might have missed an interface point on an edge, just re-set to missing value
-                print*,'warning face flag:',myrank,'ispec=',ispec,'rank=',rank
-                print*,'  flag=',work_test_flag(iglob),'iface jk=',iface,j,k,'missed iglob=',iglob
-                !work_test_flag(iglob) = 0
-              else
-                print*,'error face flag:',myrank,'ispec=',ispec,'rank=',rank
-                print*,'  flag=',work_test_flag(iglob),'iface jk=',iface,j,k,'iglob=',iglob
-                call exit_mpi(myrank,'error face flag')
-              endif
-            endif
-
-          enddo
-        enddo
-      endif
-    enddo ! iface
-
-    ! 2. finds neighbours which share a single edge with this process
-    ! note: by now, faces have subtracted their neighbours, edges can hold only one more process info
-
-    ! loops over all edges of element
-    do iedge = 1, 12
-
-      ! chooses a point inside edge but not corner
-      select case( iedge )
-      case( 1 )
-        ! face I == 1, J == 1
-        iglob = ibool(1,1,MID,ispec)
-      case( 2 )
-        ! face I == 1, J == NGLLY
-        iglob = ibool(1,NGLLY,MID,ispec)
-      case( 3 )
-        ! face I == 1, K == 1
-        iglob = ibool(1,MID,1,ispec)
-      case( 4 )
-        ! face I == 1, K == NGLLZ
-        iglob = ibool(1,MID,NGLLZ,ispec)
-      case( 5 )
-        ! face I == NGLLX, J == 1
-        iglob = ibool(NGLLX,1,MID,ispec)
-      case( 6 )
-        ! face I == NGLLX, J == NGLLY
-        iglob = ibool(NGLLX,NGLLY,MID,ispec)
-      case( 7 )
-        ! face I == NGLLX, K == 1
-        iglob = ibool(NGLLX,MID,1,ispec)
-      case( 8 )
-        ! face I == NGLLX, K == NGLLZ
-        iglob = ibool(NGLLX,MID,NGLLZ,ispec)
-      case( 9 )
-        ! face J == 1, K == 1
-        iglob = ibool(MID,1,1,ispec)
-      case( 10 )
-        ! face J == 1, K == NGLLZ
-        iglob = ibool(MID,1,NGLLZ,ispec)
-      case( 11 )
-        ! face J == NGLLY, K == 1
-        iglob = ibool(MID,NGLLY,1,ispec)
-      case( 12 )
-        ! face J == NGLLY, K == NGLLZ
-        iglob = ibool(MID,NGLLY,NGLLZ,ispec)
-      end select
-
-      ! checks assembled flag on global point
-      if( work_test_flag(iglob) > 0 ) then
-        ispec_is_outer = .true.
-
-        ! rank of neighbor process
-        rank = work_test_flag(iglob) - 1
-
-        ! checks ranks range
-        if( rank < 0 .or. rank >= NPROCTOT_VAL ) then
-          print*,'error egde rank: ',myrank
-          print*,'  neighbor rank = ',rank,'exceeds total nproc:',NPROCTOT_VAL
-          print*,'  edge ',iedge
-          call exit_mpi(myrank,'error edge neighbor mpi rank')
-        endif
-
-        ! checks if already stored
-        icurrent = 0
-        is_done = .false.
-        do ii = 1,iinterface
-          if( rank == my_neighbours(ii) ) then
-            icurrent = ii
-            is_done = .true.
-            exit
-          endif
-        enddo
-
-        ! updates interfaces array
-        if( .not. is_done ) then
-          iinterface = iinterface + 1
-          if( iinterface > MAX_NEIGHBOURS ) &
-            call exit_mpi(myrank,'interface edge exceeds MAX_NEIGHBOURS range')
-          ! adds as neighbor new interface
-          my_neighbours(iinterface) = rank
-          icurrent = iinterface
-        endif
-        if( icurrent == 0 ) &
-          call exit_mpi(myrank,'could not find current interface for this neighbor, please check my_neighbours')
-
-        ! adds interface points and removes neighbor flag from edge
-        ! assumes NGLLX == NGLLY == NGLLZ
-        do k = 1,NGLLX
-          select case( iedge )
-          case( 1 )
-            ! face I == 1, J == 1
-            iglob = ibool(1,1,k,ispec)
-          case( 2 )
-            ! face I == 1, J == NGLLY
-            iglob = ibool(1,NGLLY,k,ispec)
-          case( 3 )
-            ! face I == 1, K == 1
-            iglob = ibool(1,k,1,ispec)
-          case( 4 )
-            ! face I == 1, K == NGLLZ
-            iglob = ibool(1,k,NGLLZ,ispec)
-          case( 5 )
-            ! face I == NGLLX, J == 1
-            iglob = ibool(NGLLX,1,k,ispec)
-          case( 6 )
-            ! face I == NGLLX, J == NGLLY
-            iglob = ibool(NGLLX,NGLLY,k,ispec)
-          case( 7 )
-            ! face I == NGLLX, K == 1
-            iglob = ibool(NGLLX,k,1,ispec)
-          case( 8 )
-            ! face I == NGLLX, K == NGLLZ
-            iglob = ibool(NGLLX,k,NGLLZ,ispec)
-          case( 9 )
-            ! face J == 1, K == 1
-            iglob = ibool(k,1,1,ispec)
-          case( 10 )
-            ! face J == 1, K == NGLLZ
-            iglob = ibool(k,1,NGLLZ,ispec)
-          case( 11 )
-            ! face J == NGLLY, K == 1
-            iglob = ibool(k,NGLLY,1,ispec)
-          case( 12 )
-            ! face J == NGLLY, K == NGLLZ
-            iglob = ibool(k,NGLLY,NGLLZ,ispec)
-          end select
-
-          ! checks that we take each global point (on edges and corners) only once
-          call rmd_add_interface_point(iglob,rank,icurrent, &
-                                        nibool_neighbours,MAX_NEIGHBOURS, &
-                                        ibool_neighbours,max_nibool, &
-                                        work_test_flag,NGLOB,myrank, &
-                                        .true.,add_central_cube)
-
-          ! debug
-          if( work_test_flag(iglob) < 0 ) then
-            if( IREGION == IREGION_INNER_CORE .and. INCLUDE_CENTRAL_CUBE ) then
-              ! we might have missed an interface point on an edge, just re-set to missing value
-              print*,'warning edge flag:',myrank,'ispec=',ispec,'rank=',rank
-              print*,'  flag=',work_test_flag(iglob),'iedge jk=',iedge,k,'missed iglob=',iglob
-              !work_test_flag(iglob) = 0
-            else
-              print*,'error edge flag:',myrank,'ispec=',ispec,'rank=',rank
-              print*,'  flag=',work_test_flag(iglob),'iedge jk=',iedge,k,'iglob=',iglob
-              call exit_mpi(myrank,'error edge flag')
-            endif
-          endif
-
-        enddo
-      endif
-    enddo ! iedge
-
-
-    ! 3. finds neighbours which share a single corner with this process
-    ! note: faces and edges have subtracted their neighbors, only one more process left possible
-
-    ! loops over all corners of element
-    do icorner = 1, 8
-
-      ! chooses a corner point
-      select case( icorner )
-      case( 1 )
-        ! face I == 1
-        iglob = ibool(1,1,1,ispec)
-      case( 2 )
-        ! face I == 1
-        iglob = ibool(1,NGLLY,1,ispec)
-      case( 3 )
-        ! face I == 1
-        iglob = ibool(1,1,NGLLZ,ispec)
-      case( 4 )
-        ! face I == 1
-        iglob = ibool(1,NGLLY,NGLLZ,ispec)
-      case( 5 )
-        ! face I == NGLLX
-        iglob = ibool(NGLLX,1,1,ispec)
-      case( 6 )
-        ! face I == NGLLX
-        iglob = ibool(NGLLX,NGLLY,1,ispec)
-      case( 7 )
-        ! face I == NGLLX
-        iglob = ibool(NGLLX,1,NGLLZ,ispec)
-      case( 8 )
-        ! face I == NGLLX
-        iglob = ibool(NGLLX,NGLLY,NGLLZ,ispec)
-      end select
-
-      ! makes sure that all elements on mpi interfaces are included
-      ! uses original test_flag array, since the working copy reduces values
-      ! note: there can be elements which have an edge or corner shared with
-      !          other mpi partitions, but have the work_test_flag value already set to zero
-      !          since the iglob point was found before.
-      !          also, this check here would suffice to determine the outer flag, but we also include the
-      !          check everywhere we encounter it too
-      if( test_flag(iglob) > 0.5 ) then
-        ispec_is_outer = .true.
-      endif
-
-      ! checks assembled flag on global point
-      if( work_test_flag(iglob) > 0 ) then
-        ispec_is_outer = .true.
-
-        ! rank of neighbor process
-        rank = work_test_flag(iglob) - 1
-
-        ! checks ranks range
-        if( rank < 0 .or. rank >= NPROCTOT_VAL ) then
-          print*,'error corner: ',myrank
-          print*,'  neighbor rank = ',rank,'exceeds total nproc:',NPROCTOT_VAL
-          print*,'  corner ',icorner
-          call exit_mpi(myrank,'error corner neighbor mpi rank')
-        endif
-
-        ! checks if already stored
-        icurrent = 0
-        is_done = .false.
-        do ii = 1,iinterface
-          if( rank == my_neighbours(ii) ) then
-            icurrent = ii
-            is_done = .true.
-            exit
-          endif
-        enddo
-
-        ! updates interfaces array
-        if( .not. is_done ) then
-          iinterface = iinterface + 1
-          if( iinterface > MAX_NEIGHBOURS ) &
-            call exit_mpi(myrank,'interface corner exceed MAX_NEIGHBOURS range')
-          ! adds as neighbor new interface
-          my_neighbours(iinterface) = rank
-          icurrent = iinterface
-        endif
-        if( icurrent == 0 ) &
-          call exit_mpi(myrank,'could not find current interface for this neighbor, please check my_neighbours')
-
-        ! adds this corner as interface point and removes neighbor flag from face,
-        ! checks that we take each global point (on edges and corners) only once
-        call rmd_add_interface_point(iglob,rank,icurrent, &
-                                    nibool_neighbours,MAX_NEIGHBOURS, &
-                                    ibool_neighbours,max_nibool, &
-                                    work_test_flag,NGLOB,myrank, &
-                                    .false.,add_central_cube)
-
-        ! debug
-        if( work_test_flag(iglob) < 0 ) call exit_mpi(myrank,'error corner flag')
-
-      endif
-
-    enddo ! icorner
-
-    ! stores flags for outer elements when recognized as such
-    ! (inner/outer elements separated for non-blocking mpi communications)
-    if( ispec_is_outer ) then
-      work_ispec_is_outer(ispec) = .true.
-    endif
-
-  enddo
-
-  ! number of outer elements (on MPI interfaces)
-  npoin = count( work_ispec_is_outer )
-
-  ! debug: user output
-  if( add_central_cube ) then
-    print*, 'rank',myrank,'interfaces : ',iinterface
-    do j=1,iinterface
-      print*, '  my_neighbours: ',my_neighbours(j),nibool_neighbours(j)
-    enddo
-    print*, '  test flag min/max: ',minval(work_test_flag),maxval(work_test_flag)
-    print*, '  outer elements: ',npoin
-    print*
-  endif
-
-  ! checks if all points were recognized
-  if( minval(work_test_flag) < 0 .or. maxval(work_test_flag) > 0 ) then
-    print*,'error mpi interface rank: ',myrank
-    print*,'  work_test_flag min/max :',minval(work_test_flag),maxval(work_test_flag)
-    call exit_mpi(myrank,'error: mpi points remain unrecognized, please check mesh interfaces')
-  endif
-
-  ! sets interfaces infos
-  num_interfaces = iinterface
-  max_nibool_interfaces = maxval( nibool_neighbours(1:num_interfaces) )
-
-  ! optional: ibool usually is already sorted,
-  !                this makes sure ibool_neighbours arrays are still sorted
-  !               (iglob indices in increasing order; we will access acceleration fields accel(:,iglob),
-  !                thus it helps if iglob strides are short and accesses are close-by)
-  do iinterface = 1,num_interfaces
-    npoin = nibool_neighbours(iinterface)
-    call heap_sort( npoin, ibool_neighbours(1:npoin,iinterface) )
-
-    ! debug: checks if unique set of iglob values
-    do j=1,npoin-1
-      if( ibool_neighbours(j,iinterface) == ibool_neighbours(j+1,iinterface) ) then
-        if( IREGION == IREGION_INNER_CORE .and. INCLUDE_CENTRAL_CUBE ) then
-          ! missing points might have been counted more than once
-          if( ibool_neighbours(j,iinterface) > 0 ) then
-            print*,'warning mpi interface rank:',myrank
-            print*,'  interface: ',my_neighbours(iinterface),'point: ',j,'of',npoin,'iglob=',ibool_neighbours(j,iinterface)
-            ! decrease number of points
-            nibool_neighbours(iinterface) = nibool_neighbours(iinterface) - 1
-            if( nibool_neighbours(iinterface) <= 0 ) then
-              print*,'error zero mpi interface rank:',myrank,'interface=',my_neighbours(iinterface)
-              call exit_mpi(myrank,'error: zero mpi points on interface')
-            endif
-            ! shift values
-            do k = j+1,npoin-1
-              ii = ibool_neighbours(k+1,iinterface)
-              ibool_neighbours(k,iinterface) = ii
-            enddo
-            ! re-sets values
-            ibool_neighbours(npoin,iinterface) = 0
-            npoin = nibool_neighbours(iinterface)
-            max_nibool_interfaces = maxval( nibool_neighbours(1:num_interfaces) )
-          endif
-        else
-          print*,'error mpi interface rank:',myrank
-          print*,'  interface: ',my_neighbours(iinterface),'point: ',j,'of',npoin,'iglob=',ibool_neighbours(j,iinterface)
-          call exit_mpi(myrank,'error: mpi points not unique on interface')
-        endif
-      endif
-    enddo
-  enddo
-
-  ! re-sets flags for outer elements
-  is_on_a_slice_edge(:) = work_ispec_is_outer(:)
-
-  end subroutine rmd_get_MPI_interfaces
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine rmd_add_interface_point(iglob,rank,icurrent, &
-                                    nibool_neighbours,MAX_NEIGHBOURS, &
-                                    ibool_neighbours,max_nibool, &
-                                    work_test_flag,NGLOB,myrank, &
-                                    is_face_edge,add_central_cube)
-
-  use constants
-
-  implicit none
-
-  integer,intent(in) :: iglob,rank,icurrent
-  integer,intent(in) :: myrank
-
-  integer,intent(in) :: MAX_NEIGHBOURS,max_nibool
-  integer, dimension(MAX_NEIGHBOURS),intent(inout) :: nibool_neighbours
-  integer, dimension(max_nibool,MAX_NEIGHBOURS),intent(inout) :: ibool_neighbours
-
-  integer,intent(in) :: NGLOB
-  integer,dimension(NGLOB) :: work_test_flag
-
-  logical,intent(in) :: is_face_edge,add_central_cube
-
-  ! local parameters
-  integer :: i
-  logical :: is_done
-
-  ! let's check and be sure for central cube
-  !if( work_test_flag(iglob) <= 0 ) cycle ! continues to next point
-
-  ! checks that we take each global point (on edges and corners) only once
-  is_done = .false.
-  do i=1,nibool_neighbours(icurrent)
-    if( ibool_neighbours(i,icurrent) == iglob ) then
-      is_done = .true.
-      exit
-    endif
-  enddo
-
-  ! checks if anything to do
-  if( is_done ) then
-    ! special handling for central cube: removes rank if already added in inner core
-    if( add_central_cube ) then
-      if( is_face_edge .and. work_test_flag(iglob) < (rank + 1) ) then
-        ! re-sets if we missed this rank number
-        work_test_flag(iglob) = work_test_flag(iglob) + (rank + 1)
-      endif
-      ! re-sets flag
-      work_test_flag(iglob) = work_test_flag(iglob) - ( rank + 1 )
-      if( is_face_edge .and. work_test_flag(iglob) < 0 ) then
-        ! re-sets to zero if we missed this rank number
-        if( work_test_flag(iglob) == - (rank + 1 ) ) work_test_flag(iglob) = 0
-      endif
-    endif
-    return
-  endif
-
-  ! checks if flag was set correctly
-  if( work_test_flag(iglob) <= 0 ) then
-    ! we might have missed an interface point on an edge, just re-set to missing value
-    print*,'warning ',myrank,' flag: missed rank=',rank
-    print*,'  flag=',work_test_flag(iglob),'missed iglob=',iglob,'interface=',icurrent
-    print*
-  endif
-  ! we might have missed an interface point on an edge, just re-set to missing value
-  if( is_face_edge ) then
-    if( work_test_flag(iglob) < (rank + 1) ) then
-      ! re-sets if we missed this rank number
-      work_test_flag(iglob) = work_test_flag(iglob) + (rank + 1)
-    endif
-  endif
-
-  ! adds point
-  ! increases number of total points on this interface
-  nibool_neighbours(icurrent) = nibool_neighbours(icurrent) + 1
-  if( nibool_neighbours(icurrent) > max_nibool) &
-      call exit_mpi(myrank,'interface face exceeds max_nibool range')
-
-  ! stores interface iglob index
-  ibool_neighbours( nibool_neighbours(icurrent),icurrent ) = iglob
-
-  ! re-sets flag
-  work_test_flag(iglob) = work_test_flag(iglob) - ( rank + 1 )
-
-  ! checks
-  if( is_face_edge .and. work_test_flag(iglob) < 0 ) then
-    ! re-sets to zero if we missed this rank number
-    if( work_test_flag(iglob) == - (rank + 1 ) ) work_test_flag(iglob) = 0
-  endif
-
-  end subroutine rmd_add_interface_point
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-  subroutine rmd_test_MPI_neighbours(num_interfaces,my_neighbours,nibool_interfaces)
-
-  use specfem_par
-  implicit none
-
-  include 'mpif.h'
-
-  integer,intent(in) :: num_interfaces
-  integer,dimension(num_interfaces),intent(in) :: my_neighbours,nibool_interfaces
-
-  ! local parameters
-  integer,dimension(:),allocatable :: dummy_i
-  integer,dimension(:,:),allocatable :: test_interfaces
-  integer,dimension(:,:),allocatable :: test_interfaces_nibool
-  integer :: msg_status(MPI_STATUS_SIZE)
-  integer :: ineighbour,iproc,inum,i,j,ier,ipoints,max_num
-  logical :: is_okay
-
-  ! checks neighbors
-  ! gets maximum interfaces from all processes
-  call MPI_REDUCE(num_interfaces,max_num,1,MPI_INTEGER,MPI_MAX,0,MPI_COMM_WORLD,ier)
-
-  ! master gathers infos
-  if( myrank == 0 ) then
-    ! array for gathering infos
-    allocate(test_interfaces(max_num,0:NPROCTOT_VAL),stat=ier)
-    if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_interfaces')
-    test_interfaces = -1
-
-    allocate(test_interfaces_nibool(max_num,0:NPROCTOT_VAL),stat=ier)
-    if( ier /= 0 ) call exit_mpi(myrank,'error allocating test_interfaces_nibool')
-    test_interfaces_nibool = 0
-
-    ! used to store number of interfaces per proc
-    allocate(dummy_i(0:NPROCTOT_VAL),stat=ier)
-    if( ier /= 0 ) call exit_mpi(myrank,'error allocating dummy_i for test interfaces')
-    dummy_i = 0
-
-    ! sets infos for master process
-    test_interfaces(1:num_interfaces,0) = my_neighbours(1:num_interfaces)
-    test_interfaces_nibool(1:num_interfaces,0) = nibool_interfaces(1:num_interfaces)
-    dummy_i(0) = num_interfaces
-
-    ! collects from other processes
-    do iproc=1,NPROCTOT_VAL-1
-      ! gets number of interfaces
-      call MPI_RECV(inum,1,MPI_INTEGER,iproc,itag,MPI_COMM_WORLD,msg_status,ier)
-      dummy_i(iproc) = inum
-      if( inum > 0 ) then
-        call MPI_RECV(test_interfaces(1:inum,iproc),inum, &
-                      MPI_INTEGER,iproc,itag,MPI_COMM_WORLD,msg_status,ier)
-        call MPI_RECV(test_interfaces_nibool(1:inum,iproc),inum, &
-                      MPI_INTEGER,iproc,itag,MPI_COMM_WORLD,msg_status,ier)
-      endif
-    enddo
-  else
-    ! sends infos to master process
-    call MPI_SEND(num_interfaces,1,MPI_INTEGER,0,itag,MPI_COMM_WORLD,ier)
-    if( num_interfaces > 0 ) then
-      call MPI_SEND(my_neighbours(1:num_interfaces),num_interfaces, &
-                    MPI_INTEGER,0,itag,MPI_COMM_WORLD,ier)
-      call MPI_SEND(nibool_interfaces(1:num_interfaces),num_interfaces, &
-                    MPI_INTEGER,0,itag,MPI_COMM_WORLD,ier)
-    endif
-  endif
-  call sync_all()
-
-  ! checks if addressing is okay
-  if( myrank == 0 ) then
-    ! for each process
-    do iproc=0,NPROCTOT_VAL-1
-      ! loops over all neighbors
-      do i=1,dummy_i(iproc)
-        ! gets neighbour rank and number of points on interface with it
-        ineighbour = test_interfaces(i,iproc)
-        ipoints = test_interfaces_nibool(i,iproc)
-
-        ! checks values
-        if( ineighbour < 0 .or. ineighbour > NPROCTOT_VAL-1 ) then
-          print*,'error neighbour:',iproc,ineighbour
-          call exit_mpi(myrank,'error ineighbour')
-        endif
-        if( ipoints <= 0 ) then
-          print*,'error neighbour points:',iproc,ipoints
-          call exit_mpi(myrank,'error ineighbour points')
-        endif
-
-        ! looks up corresponding entry in neighbour array
-        is_okay = .false.
-        do j=1,dummy_i(ineighbour)
-          if( test_interfaces(j,ineighbour) == iproc ) then
-            ! checks if same number of interface points with this neighbour
-            if( test_interfaces_nibool(j,ineighbour) == ipoints ) then
-              is_okay = .true.
-            else
-              print*,'error ',iproc,'neighbour ',ineighbour,' points =',ipoints
-              print*,'  ineighbour has points = ',test_interfaces_nibool(j,ineighbour)
-              print*
-              call exit_mpi(myrank,'error ineighbour points differ')
-            endif
-            exit
-          endif
-        enddo
-        if( .not. is_okay ) then
-          print*,'error ',iproc,' neighbour not found: ',ineighbour
-          print*,'iproc ',iproc,' interfaces:'
-          print*,test_interfaces(1:dummy_i(iproc),iproc)
-          print*,'ineighbour ',ineighbour,' interfaces:'
-          print*,test_interfaces(1:dummy_i(ineighbour),ineighbour)
-          print*
-          call exit_mpi(myrank,'error ineighbour not found')
-        endif
-      enddo
-    enddo
-
-    ! user output
-    write(IMAIN,*) '  mpi addressing maximum interfaces:',maxval(dummy_i)
-    write(IMAIN,*) '  mpi addressing : all interfaces okay'
+    percentage_edge = 100.* nspec_outer_outer_core / real(NSPEC_OUTER_CORE)
+    write(IMAIN,*) 'percentage of edge elements in outer core ',percentage_edge,'%'
+    write(IMAIN,*) 'percentage of volume elements in outer core ',100. - percentage_edge,'%'
     write(IMAIN,*)
 
-    deallocate(dummy_i)
-    deallocate(test_interfaces)
+    percentage_edge = 100. * nspec_outer_inner_core / real(NSPEC_INNER_CORE)
+    write(IMAIN,*) 'percentage of edge elements in inner core ',percentage_edge,'%'
+    write(IMAIN,*) 'percentage of volume elements in inner core ',100. - percentage_edge,'%'
+    write(IMAIN,*)
   endif
+  ! synchronizes MPI processes
   call sync_all()
-
-  end subroutine rmd_test_MPI_neighbours
-
+  
+  end subroutine read_mesh_databases_MPI
+  
 !
 !-------------------------------------------------------------------------------------------------
 !
 
-  subroutine rmd_test_MPI_cm()
-
+  subroutine read_mesh_databases_MPI_CM()
+  
   use specfem_par
   use specfem_par_crustmantle
-  use specfem_par_outercore
-  use specfem_par_innercore
   implicit none
 
-  include 'mpif.h'
-
   ! local parameters
-  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: test_flag_vector
-  integer :: i,j,iglob,ier
-  integer :: inum,icount
+  integer :: ier
 
-  ! crust mantle
-  allocate(test_flag_vector(NDIM,NGLOB_CRUST_MANTLE),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array test_flag etc.'
+  ! crust mantle region
+  
+  ! create the name for the database of the current slide and region
+  call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
+  
+  open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_mpi.bin', &
+       status='old',action='read',form='unformatted',iostat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error opening solver_data_mpi.bin')
 
-  ! points defined by interfaces
-  test_flag_vector = 0.0
-  do i=1,num_interfaces_crust_mantle
-    do j=1,nibool_interfaces_crust_mantle(i)
-      iglob = ibool_interfaces_crust_mantle(j,i)
-      test_flag_vector(1,iglob) = 1.0_CUSTOM_REAL
-    enddo
-  enddo
-  i = sum(nibool_interfaces_crust_mantle)
-  call MPI_REDUCE(i,inum,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
-  i = nint( sum(test_flag_vector) )
-  call MPI_REDUCE(i,icount,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
-  if( myrank == 0 ) then
-    write(IMAIN,*) '  total MPI interface points : ',inum
-    write(IMAIN,*) '  unique MPI interface points: ',icount
+  ! MPI interfaces
+  read(IIN) num_interfaces_crust_mantle
+  allocate(my_neighbours_crust_mantle(num_interfaces_crust_mantle), &
+          nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
+          stat=ier)
+  if( ier /= 0 ) &
+    call exit_mpi(myrank,'error allocating array my_neighbours_crust_mantle etc.')
+  
+  if( num_interfaces_crust_mantle > 0 ) then
+    read(IIN) max_nibool_interfaces_crust_mantle
+    allocate(ibool_interfaces_crust_mantle(max_nibool_interfaces_crust_mantle,num_interfaces_crust_mantle), &
+            stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_crust_mantle')
+    
+    read(IIN) my_neighbours_crust_mantle
+    read(IIN) nibool_interfaces_crust_mantle
+    read(IIN) ibool_interfaces_crust_mantle
+  else
+    ! dummy array
+    max_nibool_interfaces_crust_mantle = 0
+    allocate(ibool_interfaces_crust_mantle(0,0),stat=ier)  
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating array dummy ibool_interfaces_crust_mantle')    
   endif
 
-  ! initializes for assembly
-  test_flag_vector = myrank + 1.0_CUSTOM_REAL
-  call sync_all()
+  ! inner / outer elements
+  read(IIN) nspec_inner_crust_mantle,nspec_outer_crust_mantle
+  read(IIN) num_phase_ispec_crust_mantle
+  if( num_phase_ispec_crust_mantle < 0 ) &
+    call exit_mpi(myrank,'error num_phase_ispec_crust_mantle is < zero')
 
-  ! adds contributions from different partitions to flag arrays
-  ! custom_real arrays
-  ! send
-  call assemble_MPI_vector_ext_mesh_s(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
-                      test_flag_vector, &
-                      buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle, &
-                      num_interfaces_crust_mantle,max_nibool_interfaces_crust_mantle, &
-                      nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle,&
-                      my_neighbours_crust_mantle, &
-                      request_send_vector_crust_mantle,request_recv_vector_crust_mantle)
-  ! receive
-  call assemble_MPI_vector_ext_mesh_w(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
-                              test_flag_vector, &
-                              buffer_recv_vector_crust_mantle,num_interfaces_crust_mantle,&
-                              max_nibool_interfaces_crust_mantle, &
-                              nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
-                              request_send_vector_crust_mantle,request_recv_vector_crust_mantle)
-  call sync_all()
+  allocate(phase_ispec_inner_crust_mantle(num_phase_ispec_crust_mantle,2),&
+          stat=ier)
+  if( ier /= 0 ) &
+    call exit_mpi(myrank,'error allocating array phase_ispec_inner_crust_mantle')
+    
+  if(num_phase_ispec_crust_mantle > 0 ) read(IIN) phase_ispec_inner_crust_mantle
 
-  ! checks number of interface points
-  i = 0
-  do iglob=1,NGLOB_CRUST_MANTLE
-    ! only counts flags with MPI contributions
-    if( test_flag_vector(1,iglob) > myrank+1.0+0.5 ) i = i + 1
-  enddo
-  deallocate(test_flag_vector)
-  call MPI_REDUCE(i,inum,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
+  ! mesh coloring for GPUs
+  if( USE_MESH_COLORING_GPU ) then
+    ! colors
+    read(IIN) num_colors_outer_crust_mantle,num_colors_inner_crust_mantle
 
-  ! points defined by interfaces
-  if( myrank == 0 ) then
-    write(IMAIN,*) '  total assembled MPI interface points:',inum
-    write(IMAIN,*)
+    allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle + num_colors_inner_crust_mantle), &
+            stat=ier)
+    if( ier /= 0 ) &
+      call exit_mpi(myrank,'error allocating num_elem_colors_crust_mantle array')
 
-    ! checks
-    if( inum /= icount ) then
-      print*,'error crust mantle : total mpi points:',myrank,'total: ',inum,icount
-      call exit_mpi(myrank,'error MPI assembly crust mantle')
-    endif
+    read(IIN) num_elem_colors_crust_mantle
+  else
+    ! allocates dummy arrays
+    num_colors_outer_crust_mantle = 0
+    num_colors_inner_crust_mantle = 0
+    allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle + num_colors_inner_crust_mantle), &
+            stat=ier)
+    if( ier /= 0 ) &
+      call exit_mpi(myrank,'error allocating num_elem_colors_crust_mantle array')
   endif
+  
+  close(IIN)
+  
+  end subroutine read_mesh_databases_MPI_CM
 
-  call sync_all()
 
-  end subroutine rmd_test_MPI_cm
-
 !
 !-------------------------------------------------------------------------------------------------
 !
 
-  subroutine rmd_test_MPI_oc()
-
+  subroutine read_mesh_databases_MPI_OC()
+  
   use specfem_par
-  use specfem_par_crustmantle
-  use specfem_par_outercore
-  use specfem_par_innercore
+  use specfem_par_outercore  
   implicit none
 
-  include 'mpif.h'
-
   ! local parameters
-  real(kind=CUSTOM_REAL),dimension(:),allocatable :: test_flag
-  integer :: i,j,iglob,ier
-  integer :: inum,icount
+  integer :: ier
 
-  ! outer core
-  allocate(test_flag(NGLOB_OUTER_CORE),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array test_flag etc.'
+  ! crust mantle region
+  
+  ! create the name for the database of the current slide and region
+  call create_name_database(prname,myrank,IREGION_OUTER_CORE,LOCAL_PATH)
+  
+  open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_mpi.bin', &
+       status='old',action='read',form='unformatted',iostat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error opening solver_data_mpi.bin')
 
-  ! points defined by interfaces
-  test_flag = 0.0
-  do i=1,num_interfaces_outer_core
-    do j=1,nibool_interfaces_outer_core(i)
-      iglob = ibool_interfaces_outer_core(j,i)
-      test_flag(iglob) = 1.0_CUSTOM_REAL
-    enddo
-  enddo
-  i = sum(nibool_interfaces_outer_core)
-  call MPI_REDUCE(i,inum,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
-  i = nint( sum(test_flag) )
-  call MPI_REDUCE(i,icount,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
-  if( myrank == 0 ) then
-    write(IMAIN,*) '  total MPI interface points : ',inum
-    write(IMAIN,*) '  unique MPI interface points: ',icount
+  ! MPI interfaces
+  read(IIN) num_interfaces_outer_core
+  allocate(my_neighbours_outer_core(num_interfaces_outer_core), &
+          nibool_interfaces_outer_core(num_interfaces_outer_core), &
+          stat=ier)
+  if( ier /= 0 ) &
+    call exit_mpi(myrank,'error allocating array my_neighbours_outer_core etc.')
+  
+  if( num_interfaces_outer_core > 0 ) then
+    read(IIN) max_nibool_interfaces_outer_core
+    allocate(ibool_interfaces_outer_core(max_nibool_interfaces_outer_core,num_interfaces_outer_core), &
+            stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_outer_core')
+    
+    read(IIN) my_neighbours_outer_core
+    read(IIN) nibool_interfaces_outer_core
+    read(IIN) ibool_interfaces_outer_core
+  else
+    ! dummy array
+    max_nibool_interfaces_outer_core = 0
+    allocate(ibool_interfaces_outer_core(0,0),stat=ier)  
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating array dummy ibool_interfaces_outer_core')    
   endif
 
-  ! initialized for assembly
-  test_flag = myrank + 1.0_CUSTOM_REAL
-  call sync_all()
+  ! inner / outer elements
+  read(IIN) nspec_inner_outer_core,nspec_outer_outer_core
+  read(IIN) num_phase_ispec_outer_core
+  if( num_phase_ispec_outer_core < 0 ) &
+    call exit_mpi(myrank,'error num_phase_ispec_outer_core is < zero')
 
-  ! adds contributions from different partitions to flag arrays
-  ! custom_real arrays
-  ! send
-  call assemble_MPI_scalar_ext_mesh_s(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
-                                test_flag, &
-                                buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core, &
-                                num_interfaces_outer_core,max_nibool_interfaces_outer_core, &
-                                nibool_interfaces_outer_core,ibool_interfaces_outer_core,&
-                                my_neighbours_outer_core, &
-                                request_send_scalar_outer_core,request_recv_scalar_outer_core)
-  ! receive
-  call assemble_MPI_scalar_ext_mesh_w(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
-                                test_flag, &
-                                buffer_recv_scalar_outer_core,num_interfaces_outer_core,&
-                                max_nibool_interfaces_outer_core, &
-                                nibool_interfaces_outer_core,ibool_interfaces_outer_core, &
-                                request_send_scalar_outer_core,request_recv_scalar_outer_core)
-  call sync_all()
+  allocate(phase_ispec_inner_outer_core(num_phase_ispec_outer_core,2),&
+          stat=ier)
+  if( ier /= 0 ) &
+    call exit_mpi(myrank,'error allocating array phase_ispec_inner_outer_core')
+    
+  if(num_phase_ispec_outer_core > 0 ) read(IIN) phase_ispec_inner_outer_core
 
-  ! checks number of interface points
-  i = 0
-  do iglob=1,NGLOB_OUTER_CORE
-    ! only counts flags with MPI contributions
-    if( test_flag(iglob) > myrank+1.0+0.5 ) i = i + 1
-  enddo
-  call MPI_REDUCE(i,inum,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
-  deallocate(test_flag)
+  ! mesh coloring for GPUs
+  if( USE_MESH_COLORING_GPU ) then
+    ! colors
+    read(IIN) num_colors_outer_outer_core,num_colors_inner_outer_core
 
-  ! output
-  if( myrank == 0 ) then
-    write(IMAIN,*) '  total assembled MPI interface points:',inum
-    write(IMAIN,*)
+    allocate(num_elem_colors_outer_core(num_colors_outer_outer_core + num_colors_inner_outer_core), &
+            stat=ier)
+    if( ier /= 0 ) &
+      call exit_mpi(myrank,'error allocating num_elem_colors_outer_core array')
 
-    ! checks
-    if( inum /= icount ) then
-      print*,'error outer core : total mpi points:',myrank,'total: ',inum,icount
-      call exit_mpi(myrank,'error MPI assembly outer_core')
-    endif
+    read(IIN) num_elem_colors_outer_core
+  else
+    ! allocates dummy arrays
+    num_colors_outer_outer_core = 0
+    num_colors_inner_outer_core = 0
+    allocate(num_elem_colors_outer_core(num_colors_outer_outer_core + num_colors_inner_outer_core), &
+            stat=ier)
+    if( ier /= 0 ) &
+      call exit_mpi(myrank,'error allocating num_elem_colors_outer_core array')
   endif
+  
+  close(IIN)
+  
+  end subroutine read_mesh_databases_MPI_OC
 
-  call sync_all()
-
-  end subroutine rmd_test_MPI_oc
-
-
 !
 !-------------------------------------------------------------------------------------------------
 !
 
-  subroutine rmd_test_MPI_ic()
-
+  subroutine read_mesh_databases_MPI_IC()
+  
   use specfem_par
-  use specfem_par_crustmantle
-  use specfem_par_outercore
   use specfem_par_innercore
   implicit none
 
-  include 'mpif.h'
-
   ! local parameters
-  real(kind=CUSTOM_REAL),dimension(:,:),allocatable :: test_flag_vector
-  integer :: i,j,iglob,ier
-  integer :: inum,icount
+  integer :: ier
 
-  ! inner core
-  allocate(test_flag_vector(NDIM,NGLOB_INNER_CORE),stat=ier)
-  if( ier /= 0 ) stop 'error allocating array test_flag etc.'
+  ! crust mantle region
+  
+  ! create the name for the database of the current slide and region
+  call create_name_database(prname,myrank,IREGION_INNER_CORE,LOCAL_PATH)
+  
+  open(unit=IIN,file=prname(1:len_trim(prname))//'solver_data_mpi.bin', &
+       status='old',action='read',form='unformatted',iostat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error opening solver_data_mpi.bin')
 
-  ! points defined by interfaces
-  test_flag_vector = 0.0
-  do i=1,num_interfaces_inner_core
-    do j=1,nibool_interfaces_inner_core(i)
-      iglob = ibool_interfaces_inner_core(j,i)
-      test_flag_vector(1,iglob) = 1.0_CUSTOM_REAL
-    enddo
-  enddo
-  i = sum(nibool_interfaces_inner_core)
-  call MPI_REDUCE(i,inum,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
-  i = nint( sum(test_flag_vector) )
-  call MPI_REDUCE(i,icount,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
-  if( myrank == 0 ) then
-    write(IMAIN,*) '  total MPI interface points : ',inum
-    write(IMAIN,*) '  unique MPI interface points: ',icount
+  ! MPI interfaces
+  read(IIN) num_interfaces_inner_core
+  allocate(my_neighbours_inner_core(num_interfaces_inner_core), &
+          nibool_interfaces_inner_core(num_interfaces_inner_core), &
+          stat=ier)
+  if( ier /= 0 ) &
+    call exit_mpi(myrank,'error allocating array my_neighbours_inner_core etc.')
+  
+  if( num_interfaces_inner_core > 0 ) then
+    read(IIN) max_nibool_interfaces_inner_core
+    allocate(ibool_interfaces_inner_core(max_nibool_interfaces_inner_core,num_interfaces_inner_core), &
+            stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating array ibool_interfaces_inner_core')
+    
+    read(IIN) my_neighbours_inner_core
+    read(IIN) nibool_interfaces_inner_core
+    read(IIN) ibool_interfaces_inner_core
+  else
+    ! dummy array
+    max_nibool_interfaces_inner_core = 0
+    allocate(ibool_interfaces_inner_core(0,0),stat=ier)  
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating array dummy ibool_interfaces_inner_core')    
   endif
 
-  ! initializes for assembly
-  test_flag_vector = myrank + 1.0_CUSTOM_REAL
-  call sync_all()
+  ! inner / outer elements
+  read(IIN) nspec_inner_inner_core,nspec_outer_inner_core
+  read(IIN) num_phase_ispec_inner_core
+  if( num_phase_ispec_inner_core < 0 ) &
+    call exit_mpi(myrank,'error num_phase_ispec_inner_core is < zero')
 
-  ! adds contributions from different partitions to flag arrays
-  ! custom_real arrays
-  ! send
-  call assemble_MPI_vector_ext_mesh_s(NPROCTOT_VAL,NGLOB_INNER_CORE, &
-                      test_flag_vector, &
-                      buffer_send_vector_inner_core,buffer_recv_vector_inner_core, &
-                      num_interfaces_inner_core,max_nibool_interfaces_inner_core, &
-                      nibool_interfaces_inner_core,ibool_interfaces_inner_core,&
-                      my_neighbours_inner_core, &
-                      request_send_vector_inner_core,request_recv_vector_inner_core)
-  ! receive
-  call assemble_MPI_vector_ext_mesh_w(NPROCTOT_VAL,NGLOB_INNER_CORE, &
-                              test_flag_vector, &
-                              buffer_recv_vector_inner_core,num_interfaces_inner_core,&
-                              max_nibool_interfaces_inner_core, &
-                              nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
-                              request_send_vector_inner_core,request_recv_vector_inner_core)
-  call sync_all()
+  allocate(phase_ispec_inner_inner_core(num_phase_ispec_inner_core,2),&
+          stat=ier)
+  if( ier /= 0 ) &
+    call exit_mpi(myrank,'error allocating array phase_ispec_inner_inner_core')
+    
+  if(num_phase_ispec_inner_core > 0 ) read(IIN) phase_ispec_inner_inner_core
 
-  ! checks number of interface points
-  i = 0
-  do iglob=1,NGLOB_INNER_CORE
-    ! only counts flags with MPI contributions
-    if( test_flag_vector(1,iglob) > myrank+1.0+0.5 ) i = i + 1
-  enddo
-  deallocate(test_flag_vector)
-  call MPI_REDUCE(i,inum,1,MPI_INTEGER,MPI_SUM,0,MPI_COMM_WORLD,ier)
+  ! mesh coloring for GPUs
+  if( USE_MESH_COLORING_GPU ) then
+    ! colors
+    read(IIN) num_colors_outer_inner_core,num_colors_inner_inner_core
 
-  if( myrank == 0 ) then
-    write(IMAIN,*) '  total assembled MPI interface points:',inum
-    write(IMAIN,*)
+    allocate(num_elem_colors_inner_core(num_colors_outer_inner_core + num_colors_inner_inner_core), &
+            stat=ier)
+    if( ier /= 0 ) &
+      call exit_mpi(myrank,'error allocating num_elem_colors_inner_core array')
 
-    ! checks
-    if( inum /= icount ) then
-      print*,'error inner core : total mpi points:',myrank,'total: ',inum,icount
-      call exit_mpi(myrank,'error MPI assembly inner core')
-    endif
+    read(IIN) num_elem_colors_inner_core
+  else
+    ! allocates dummy arrays
+    num_colors_outer_inner_core = 0
+    num_colors_inner_inner_core = 0
+    allocate(num_elem_colors_inner_core(num_colors_outer_inner_core + num_colors_inner_inner_core), &
+            stat=ier)
+    if( ier /= 0 ) &
+      call exit_mpi(myrank,'error allocating num_elem_colors_inner_core array')
   endif
+  
+  close(IIN)
+  
+  end subroutine read_mesh_databases_MPI_IC
 
-  call sync_all()
 
-  end subroutine rmd_test_MPI_ic
-
 !
 !-------------------------------------------------------------------------------------------------
 !
 
-  subroutine read_mesh_databases_InnerOuter()
-
-! sets up inner/outer elements for non-blocking MPI communication
-
-  use specfem_par
-  use specfem_par_crustmantle
-  use specfem_par_innercore
-  use specfem_par_outercore
-  implicit none
-
-  ! local parameters
-  real :: percentage_edge
-  integer :: ier,ispec,iinner,iouter
-  !character(len=150) :: filename
-
-  ! stores inner / outer elements
-  !
-  ! note: arrays is_on_a_slice_edge_.. have flags set for elements which need to
-  !         communicate with other MPI processes
-
-  ! crust_mantle
-  nspec_outer_crust_mantle = count( is_on_a_slice_edge_crust_mantle )
-  nspec_inner_crust_mantle = NSPEC_CRUST_MANTLE - nspec_outer_crust_mantle
-
-  num_phase_ispec_crust_mantle = max(nspec_inner_crust_mantle,nspec_outer_crust_mantle)
-
-  allocate(phase_ispec_inner_crust_mantle(num_phase_ispec_crust_mantle,2),stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array phase_ispec_inner_crust_mantle')
-
-  phase_ispec_inner_crust_mantle(:,:) = 0
-  iinner = 0
-  iouter = 0
-  do ispec=1,NSPEC_CRUST_MANTLE
-    if( is_on_a_slice_edge_crust_mantle(ispec) ) then
-      ! outer element
-      iouter = iouter + 1
-      phase_ispec_inner_crust_mantle(iouter,1) = ispec
-    else
-      ! inner element
-      iinner = iinner + 1
-      phase_ispec_inner_crust_mantle(iinner,2) = ispec
-    endif
-  enddo
-
-  ! outer_core
-  nspec_outer_outer_core = count( is_on_a_slice_edge_outer_core )
-  nspec_inner_outer_core = NSPEC_OUTER_CORE - nspec_outer_outer_core
-
-  num_phase_ispec_outer_core = max(nspec_inner_outer_core,nspec_outer_outer_core)
-
-  allocate(phase_ispec_inner_outer_core(num_phase_ispec_outer_core,2),stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array phase_ispec_inner_outer_core')
-
-  phase_ispec_inner_outer_core(:,:) = 0
-  iinner = 0
-  iouter = 0
-  do ispec=1,NSPEC_OUTER_CORE
-    if( is_on_a_slice_edge_outer_core(ispec) ) then
-      ! outer element
-      iouter = iouter + 1
-      phase_ispec_inner_outer_core(iouter,1) = ispec
-    else
-      ! inner element
-      iinner = iinner + 1
-      phase_ispec_inner_outer_core(iinner,2) = ispec
-    endif
-  enddo
-
-  ! inner_core
-  nspec_outer_inner_core = count( is_on_a_slice_edge_inner_core )
-  nspec_inner_inner_core = NSPEC_INNER_CORE - nspec_outer_inner_core
-
-  num_phase_ispec_inner_core = max(nspec_inner_inner_core,nspec_outer_inner_core)
-
-  allocate(phase_ispec_inner_inner_core(num_phase_ispec_inner_core,2),stat=ier)
-  if( ier /= 0 ) call exit_mpi(myrank,'error allocating array phase_ispec_inner_inner_core')
-
-  phase_ispec_inner_inner_core(:,:) = 0
-  iinner = 0
-  iouter = 0
-  do ispec=1,NSPEC_INNER_CORE
-    if( is_on_a_slice_edge_inner_core(ispec) ) then
-      ! outer element
-      iouter = iouter + 1
-      phase_ispec_inner_inner_core(iouter,1) = ispec
-    else
-      ! inner element
-      iinner = iinner + 1
-      phase_ispec_inner_inner_core(iinner,2) = ispec
-    endif
-  enddo
-
-  ! user output
-  if(myrank == 0) then
-
-    write(IMAIN,*) 'for overlapping of communications with calculations:'
-    write(IMAIN,*)
-
-    percentage_edge = 100.0 - 100. * nspec_outer_crust_mantle / real(NSPEC_CRUST_MANTLE)
-    write(IMAIN,*) 'percentage of edge elements in crust/mantle ',percentage_edge,'%'
-    write(IMAIN,*) 'percentage of volume elements in crust/mantle ',100. - percentage_edge,'%'
-    write(IMAIN,*)
-
-    percentage_edge = 100.0 - 100.* nspec_outer_outer_core / real(NSPEC_OUTER_CORE)
-    write(IMAIN,*) 'percentage of edge elements in outer core ',percentage_edge,'%'
-    write(IMAIN,*) 'percentage of volume elements in outer core ',100. - percentage_edge,'%'
-    write(IMAIN,*)
-
-    percentage_edge = 100.0 - 100. * nspec_outer_inner_core / real(NSPEC_INNER_CORE)
-    write(IMAIN,*) 'percentage of edge elements in inner core ',percentage_edge,'%'
-    write(IMAIN,*) 'percentage of volume elements in inner core ',100. - percentage_edge,'%'
-    write(IMAIN,*)
-
-  endif
-
-  ! debug: saves element flags
-  ! crust mantle
-  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_crust_mantle_proc',myrank
-  !call write_VTK_data_elem_l(NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
-  !                          xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
-  !                          ibool_crust_mantle, &
-  !                          is_on_a_slice_edge_crust_mantle,filename)
-  ! outer core
-  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_outer_core_proc',myrank
-  !call write_VTK_data_elem_l(NSPEC_OUTER_CORE,NGLOB_OUTER_CORE, &
-  !                          xstore_outer_core,ystore_outer_core,zstore_outer_core, &
-  !                          ibool_outer_core, &
-  !                          is_on_a_slice_edge_outer_core,filename)
-  ! inner core
-  !write(filename,'(a,i6.6)') trim(OUTPUT_FILES)//'/MPI_innerouter_inner_core_proc',myrank
-  !call write_VTK_data_elem_l(NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
-  !                          xstore_inner_core,ystore_inner_core,zstore_inner_core, &
-  !                          ibool_inner_core, &
-  !                          is_on_a_slice_edge_inner_core,filename)
-
-  end subroutine read_mesh_databases_InnerOuter
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
   subroutine read_mesh_databases_stacey()
 
   use specfem_par
@@ -2514,14 +1044,6 @@
     filesize = filesize*NSTEP
 
     if (SIMULATION_TYPE == 3) then
-!      open(unit=51,file=trim(prname)//'absorb_xmin.bin', &
-!            status='old',action='read',form='unformatted',access='direct', &
-!            recl=reclen_xmin_crust_mantle+2*4)
-!    else
-!      open(unit=51,file=trim(prname)//'absorb_xmin.bin', &
-!            status='unknown',form='unformatted',access='direct',&
-!            recl=reclen_xmin_crust_mantle+2*4)
-
       call open_file_abs_r(0,trim(prname)//'absorb_xmin.bin',len_trim(trim(prname)//'absorb_xmin.bin'), &
                           filesize)
     else
@@ -2541,14 +1063,6 @@
     filesize = filesize*NSTEP
 
     if (SIMULATION_TYPE == 3) then
-!      open(unit=52,file=trim(prname)//'absorb_xmax.bin', &
-!            status='old',action='read',form='unformatted',access='direct', &
-!            recl=reclen_xmax_crust_mantle+2*4)
-!    else
-!      open(unit=52,file=trim(prname)//'absorb_xmax.bin', &
-!            status='unknown',form='unformatted',access='direct', &
-!            recl=reclen_xmax_crust_mantle+2*4)
-
       call open_file_abs_r(1,trim(prname)//'absorb_xmax.bin',len_trim(trim(prname)//'absorb_xmax.bin'), &
                           filesize)
     else
@@ -2569,14 +1083,6 @@
 
 
     if (SIMULATION_TYPE == 3) then
-!      open(unit=53,file=trim(prname)//'absorb_ymin.bin', &
-!            status='old',action='read',form='unformatted',access='direct',&
-!            recl=reclen_ymin_crust_mantle+2*4)
-!    else
-!      open(unit=53,file=trim(prname)//'absorb_ymin.bin', &
-!            status='unknown',form='unformatted',access='direct',&
-!            recl=reclen_ymin_crust_mantle+2*4)
-
       call open_file_abs_r(2,trim(prname)//'absorb_ymin.bin',len_trim(trim(prname)//'absorb_ymin.bin'), &
                           filesize)
     else
@@ -2596,14 +1102,6 @@
     filesize = filesize*NSTEP
 
     if (SIMULATION_TYPE == 3) then
-!      open(unit=54,file=trim(prname)//'absorb_ymax.bin', &
-!            status='old',action='read',form='unformatted',access='direct',&
-!            recl=reclen_ymax_crust_mantle+2*4)
-!    else
-!      open(unit=54,file=trim(prname)//'absorb_ymax.bin', &
-!            status='unknown',form='unformatted',access='direct',&
-!            recl=reclen_ymax_crust_mantle+2*4)
-
       call open_file_abs_r(3,trim(prname)//'absorb_ymax.bin',len_trim(trim(prname)//'absorb_ymax.bin'), &
                           filesize)
     else
@@ -2640,14 +1138,6 @@
     filesize = filesize*NSTEP
 
     if (SIMULATION_TYPE == 3) then
-!      open(unit=61,file=trim(prname)//'absorb_xmin.bin', &
-!            status='old',action='read',form='unformatted',access='direct', &
-!            recl=reclen_xmin_outer_core+2*4)
-!    else
-!      open(unit=61,file=trim(prname)//'absorb_xmin.bin', &
-!            status='unknown',form='unformatted',access='direct',&
-!            recl=reclen_xmin_outer_core+2*4)
-
       call open_file_abs_r(4,trim(prname)//'absorb_xmin.bin',len_trim(trim(prname)//'absorb_ymax.bin'), &
                           filesize)
     else
@@ -2667,14 +1157,6 @@
     filesize = filesize*NSTEP
 
     if (SIMULATION_TYPE == 3) then
-!      open(unit=62,file=trim(prname)//'absorb_xmax.bin', &
-!            status='old',action='read',form='unformatted',access='direct', &
-!            recl=reclen_xmax_outer_core+2*4)
-!    else
-!      open(unit=62,file=trim(prname)//'absorb_xmax.bin', &
-!            status='unknown',form='unformatted',access='direct', &
-!            recl=reclen_xmax_outer_core+2*4)
-
       call open_file_abs_r(5,trim(prname)//'absorb_xmax.bin',len_trim(trim(prname)//'absorb_xmax.bin'), &
                           filesize)
     else
@@ -2695,14 +1177,6 @@
     filesize = filesize*NSTEP
 
     if (SIMULATION_TYPE == 3) then
-!      open(unit=63,file=trim(prname)//'absorb_ymin.bin', &
-!            status='old',action='read',form='unformatted',access='direct',&
-!            recl=reclen_ymin_outer_core+2*4)
-!    else
-!      open(unit=63,file=trim(prname)//'absorb_ymin.bin', &
-!            status='unknown',form='unformatted',access='direct',&
-!            recl=reclen_ymin_outer_core+2*4)
-
       call open_file_abs_r(6,trim(prname)//'absorb_ymin.bin',len_trim(trim(prname)//'absorb_ymin.bin'), &
                           filesize)
     else
@@ -2723,14 +1197,6 @@
     filesize = filesize*NSTEP
 
     if (SIMULATION_TYPE == 3) then
-!      open(unit=64,file=trim(prname)//'absorb_ymax.bin', &
-!            status='old',action='read',form='unformatted',access='direct',&
-!            recl=reclen_ymax_outer_core+2*4)
-!    else
-!      open(unit=64,file=trim(prname)//'absorb_ymax.bin', &
-!            status='unknown',form='unformatted',access='direct',&
-!            recl=reclen_ymax_outer_core+2*4)
-
       call open_file_abs_r(7,trim(prname)//'absorb_ymax.bin',len_trim(trim(prname)//'absorb_ymax.bin'), &
                           filesize)
     else
@@ -2751,14 +1217,6 @@
     filesize = filesize*NSTEP
 
     if (SIMULATION_TYPE == 3) then
-!      open(unit=65,file=trim(prname)//'absorb_zmin.bin', &
-!            status='old',action='read',form='unformatted',access='direct',&
-!            recl=reclen_zmin+2*4)
-!    else
-!      open(unit=65,file=trim(prname)//'absorb_zmin.bin', &
-!            status='unknown',form='unformatted',access='direct',&
-!            recl=reclen_zmin+2*4)
-
       call open_file_abs_r(8,trim(prname)//'absorb_zmin.bin',len_trim(trim(prname)//'absorb_zmin.bin'), &
                           filesize)
     else
@@ -2769,343 +1227,3 @@
 
   end subroutine read_mesh_databases_stacey
 
-
-!
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!-------------------------------------------------------------------------------------------------
-!
-
-!daniel: TODO - place this auxiliary function...
-
-  subroutine heap_sort( N, array )
-
-! heap sort algorithm
-! sorts integer array (in increasing order, like 1 - 5 - 6 - 9 - 12 - 13 - 14 -...)
-
-  implicit none
-  integer,intent(in) :: N
-  integer,dimension(N),intent(inout) :: array
-
-  ! local parameters
-  integer :: tmp
-  integer :: i
-
-  ! checks if anything to do
-  if( N < 2 ) return
-
-  ! builds heap
-  do i = N/2, 1, -1
-    call heap_sort_siftdown(N,array,i,N)
-  enddo
-
-  ! sorts array
-  do i = N, 2, -1
-    ! swaps last and first entry in this section
-    tmp = array(1)
-    array(1) = array(i)
-    array(i) = tmp
-    call heap_sort_siftdown(N,array,1,i-1)
-  enddo
-
-  end subroutine heap_sort
-
-!
-!----
-!
-
-  subroutine heap_sort_siftdown(N,array,start,bottom)
-
-  implicit none
-
-  integer,intent(in):: N
-  integer,dimension(N),intent(inout) :: array
-  integer :: start,bottom
-
-  ! local parameters
-  integer :: i,j
-  integer :: tmp
-
-  i = start
-  tmp = array(i)
-  j = 2*i
-  do while( j <= bottom )
-    ! chooses larger value first in this section
-    if( j < bottom ) then
-      if( array(j) <= array(j+1) ) j = j + 1
-    endif
-
-    ! checks if section already smaller than inital value
-    if( array(j) < tmp ) exit
-
-    array(i) = array(j)
-    i = j
-    j = 2*i
-  enddo
-
-  array(i) = tmp
-  return
-
-  end subroutine heap_sort_siftdown
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-!daniel: TODO - place this auxiliary function...
-
-! external mesh routine for saving vtk files for points locations
-
-  subroutine write_VTK_data_points(nglob, &
-                                  xstore_dummy,ystore_dummy,zstore_dummy, &
-                                  points_globalindices,num_points_globalindices, &
-                                  prname_file)
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: nglob
-
-! global coordinates
-  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
-
-! gll data values array
-  integer :: num_points_globalindices
-  integer, dimension(num_points_globalindices) :: points_globalindices
-
-! file name
-  character(len=150) prname_file
-
-  integer :: i,iglob
-
-! write source and receiver VTK files for Paraview
-  !debug
-  !write(IMAIN,*) '  vtk file: '
-  !write(IMAIN,*) '    ',prname_file(1:len_trim(prname_file))//'.vtk'
-
-  open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
-  write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
-  write(IOVTK,'(a)') 'material model VTK file'
-  write(IOVTK,'(a)') 'ASCII'
-  write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
-  write(IOVTK, '(a,i12,a)') 'POINTS ', num_points_globalindices, ' float'
-  do i=1,num_points_globalindices
-    iglob = points_globalindices(i)
-    if( iglob <= 0 .or. iglob > nglob ) then
-      print*,'error: '//prname_file(1:len_trim(prname_file))//'.vtk'
-      print*,'error global index: ',iglob,i
-      close(IOVTK)
-      stop 'error vtk points file'
-    endif
-
-    write(IOVTK,'(3e18.6)') xstore_dummy(iglob),ystore_dummy(iglob),zstore_dummy(iglob)
-  enddo
-  write(IOVTK,*) ""
-
-  close(IOVTK)
-
-  end subroutine write_VTK_data_points
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! external mesh routine for saving vtk files for points locations
-
-  subroutine write_VTK_glob_points(nglob, &
-                                  xstore_dummy,ystore_dummy,zstore_dummy, &
-                                  glob_values, &
-                                  prname_file)
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: nglob
-
-  ! global coordinates
-  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
-
-  ! gll data values array
-  real(kind=CUSTOM_REAL), dimension(nglob) :: glob_values
-
-  ! file name
-  character(len=150) prname_file
-
-  integer :: iglob
-
-  ! write source and receiver VTK files for Paraview
-  !debug
-  !write(IMAIN,*) '  vtk file: '
-  !write(IMAIN,*) '    ',prname_file(1:len_trim(prname_file))//'.vtk'
-
-  open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
-  write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
-  write(IOVTK,'(a)') 'material model VTK file'
-  write(IOVTK,'(a)') 'ASCII'
-  write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
-  write(IOVTK, '(a,i12,a)') 'POINTS ', nglob, ' float'
-  do iglob=1,nglob
-    write(IOVTK,*) xstore_dummy(iglob),ystore_dummy(iglob),zstore_dummy(iglob)
-  enddo
-  write(IOVTK,*) ""
-
-  ! writes out gll-data (velocity) for each element point
-  write(IOVTK,'(a,i12)') "POINT_DATA ",nglob
-  write(IOVTK,'(a)') "SCALARS glob_data float"
-  write(IOVTK,'(a)') "LOOKUP_TABLE default"
-  do iglob=1,nglob
-    write(IOVTK,*) glob_values(iglob)
-  enddo
-  write(IOVTK,*) ""
-
-  close(IOVTK)
-
-  end subroutine write_VTK_glob_points
-
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! routine for saving vtk file holding logical flag on each spectral element
-
-  subroutine write_VTK_data_elem_l(nspec,nglob, &
-                        xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
-                        elem_flag,prname_file)
-
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: nspec,nglob
-
-! global coordinates
-  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
-
-! element flag array
-  logical, dimension(nspec) :: elem_flag
-  integer :: ispec,i
-
-! file name
-  character(len=150) prname_file
-
-! write source and receiver VTK files for Paraview
-  !debug
-  !write(IMAIN,*) '  vtk file: '
-  !write(IMAIN,*) '    ',prname_file(1:len_trim(prname_file))//'.vtk'
-
-  open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
-  write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
-  write(IOVTK,'(a)') 'material model VTK file'
-  write(IOVTK,'(a)') 'ASCII'
-  write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
-  write(IOVTK, '(a,i12,a)') 'POINTS ', nglob, ' float'
-  do i=1,nglob
-    write(IOVTK,'(3e18.6)') xstore_dummy(i),ystore_dummy(i),zstore_dummy(i)
-  enddo
-  write(IOVTK,*) ""
-
-  ! note: indices for vtk start at 0
-  write(IOVTK,'(a,i12,i12)') "CELLS ",nspec,nspec*9
-  do ispec=1,nspec
-    write(IOVTK,'(9i12)') 8,ibool(1,1,1,ispec)-1,ibool(NGLLX,1,1,ispec)-1,ibool(NGLLX,NGLLY,1,ispec)-1,ibool(1,NGLLY,1,ispec)-1,&
-          ibool(1,1,NGLLZ,ispec)-1,ibool(NGLLX,1,NGLLZ,ispec)-1,ibool(NGLLX,NGLLY,NGLLZ,ispec)-1,ibool(1,NGLLY,NGLLZ,ispec)-1
-  enddo
-  write(IOVTK,*) ""
-
-  ! type: hexahedrons
-  write(IOVTK,'(a,i12)') "CELL_TYPES ",nspec
-  write(IOVTK,*) (12,ispec=1,nspec)
-  write(IOVTK,*) ""
-
-  write(IOVTK,'(a,i12)') "CELL_DATA ",nspec
-  write(IOVTK,'(a)') "SCALARS elem_flag integer"
-  write(IOVTK,'(a)') "LOOKUP_TABLE default"
-  do ispec = 1,nspec
-    if( elem_flag(ispec) .eqv. .true. ) then
-      write(IOVTK,*) 1
-    else
-      write(IOVTK,*) 0
-    endif
-  enddo
-  write(IOVTK,*) ""
-  close(IOVTK)
-
-
-  end subroutine write_VTK_data_elem_l
-
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! routine for saving vtk file holding integer value on each spectral element
-
-  subroutine write_VTK_data_elem_i(nspec,nglob, &
-                        xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
-                        elem_flag,prname_file)
-
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: nspec,nglob
-
-! global coordinates
-  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
-
-! element flag array
-  integer, dimension(nspec) :: elem_flag
-  integer :: ispec,i
-
-! file name
-  character(len=150) prname_file
-
-! write source and receiver VTK files for Paraview
-  !debug
-  !write(IMAIN,*) '  vtk file: '
-  !write(IMAIN,*) '    ',prname_file(1:len_trim(prname_file))//'.vtk'
-
-  open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
-  write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
-  write(IOVTK,'(a)') 'material model VTK file'
-  write(IOVTK,'(a)') 'ASCII'
-  write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
-  write(IOVTK, '(a,i12,a)') 'POINTS ', nglob, ' float'
-  do i=1,nglob
-    write(IOVTK,'(3e18.6)') xstore_dummy(i),ystore_dummy(i),zstore_dummy(i)
-  enddo
-  write(IOVTK,*) ""
-
-  ! note: indices for vtk start at 0
-  write(IOVTK,'(a,i12,i12)') "CELLS ",nspec,nspec*9
-  do ispec=1,nspec
-    write(IOVTK,'(9i12)') 8,ibool(1,1,1,ispec)-1,ibool(NGLLX,1,1,ispec)-1,ibool(NGLLX,NGLLY,1,ispec)-1,ibool(1,NGLLY,1,ispec)-1,&
-          ibool(1,1,NGLLZ,ispec)-1,ibool(NGLLX,1,NGLLZ,ispec)-1,ibool(NGLLX,NGLLY,NGLLZ,ispec)-1,ibool(1,NGLLY,NGLLZ,ispec)-1
-  enddo
-  write(IOVTK,*) ""
-
-  ! type: hexahedrons
-  write(IOVTK,'(a,i12)') "CELL_TYPES ",nspec
-  write(IOVTK,*) (12,ispec=1,nspec)
-  write(IOVTK,*) ""
-
-  write(IOVTK,'(a,i12)') "CELL_DATA ",nspec
-  write(IOVTK,'(a)') "SCALARS elem_val integer"
-  write(IOVTK,'(a)') "LOOKUP_TABLE default"
-  do ispec = 1,nspec
-    write(IOVTK,*) elem_flag(ispec)
-  enddo
-  write(IOVTK,*) ""
-  close(IOVTK)
-
-
-  end subroutine write_VTK_data_elem_i
-

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_topography_bathymetry.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_topography_bathymetry.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_topography_bathymetry.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -48,18 +48,25 @@
 
   ! read topography and bathymetry file
   if( TOPOGRAPHY ) then
+    ! initializes
+    ibathy_topo(:,:) = 0
+
     ! master reads file
     if(myrank == 0 ) then
+      ! user output
+      write(IMAIN,*) 'topography:'    
+
+      ! reads topo file
       call read_topo_bathy_database(ibathy_topo,LOCAL_PATH)
     endif
-
+    
     ! broadcast the information read on the master to the nodes
     call MPI_BCAST(ibathy_topo,NX_BATHY*NY_BATHY,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
   endif
 
   ! user output
   call sync_all()
-  if( myrank == 0 .and. (TOPOGRAPHY .or. OCEANS_VAL)) then
+  if( myrank == 0 .and. (TOPOGRAPHY .or. OCEANS_VAL .or. ELLIPTICITY_VAL)) then
     ! elapsed time since beginning of mesh generation
     tCPU = MPI_WTIME() - time_start
     write(IMAIN,*)

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/recompute_jacobian.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/recompute_jacobian.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/recompute_jacobian.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -28,7 +28,7 @@
 ! recompute 3D jacobian at a given point for 27-node elements
 
   subroutine recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z, &
-                   xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+                                xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
 
   implicit none
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/setup_sources_receivers.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -52,6 +52,7 @@
     write(IMAIN,*)
     if(NSOURCES > 1) write(IMAIN,*) 'Using ',NSOURCES,' point sources'
   endif
+  call sync_all()
 
   end subroutine setup_sources_receivers
 
@@ -73,7 +74,7 @@
   integer :: ier
 
   ! makes smaller hdur for movies
-  logical,parameter :: USE_SMALLER_HDUR_MOVIE = .false.
+  logical,parameter :: USE_SMALLER_HDUR_MOVIE = .true.
 
   ! allocate arrays for source
   allocate(islice_selected_source(NSOURCES), &
@@ -103,6 +104,8 @@
   allocate(nu_source(NDIM,NDIM,NSOURCES),stat=ier)
   if( ier /= 0 ) call exit_MPI(myrank,'error allocating source arrays')
 
+  call sync_all()
+
   ! sources
   ! BS BS moved open statement and writing of first lines into sr.vtk before the
   ! call to locate_sources, where further write statements to that file follow
@@ -229,6 +232,8 @@
                               elat_SAC,elon_SAC,depth_SAC,mb_SAC,cmt_lat_SAC,&
                               cmt_lon_SAC,cmt_depth_SAC,cmt_hdur_SAC,NSOURCES)
 
+  call sync_all()
+
   end subroutine setup_sources
 
 !
@@ -281,7 +286,8 @@
     endif
     write(IMAIN,*)
   endif
-
+  call sync_all()
+  
   ! locate receivers in the crust in the mesh
   call locate_receivers(NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE,ibool_crust_mantle, &
                       xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle, &
@@ -388,6 +394,8 @@
   ! frees arrays
   deallocate(theta_source,phi_source)
 
+  call sync_all()
+  
   end subroutine setup_receivers
 
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/specfem3D_par.F90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -27,7 +27,7 @@
 !
 ! United States and French Government Sponsorship Acknowledged.
 
-module constants
+module constants_solver
 
   include "constants.h"
 
@@ -35,7 +35,7 @@
   ! done for performance only using static allocation to allow for loop unrolling
   include "OUTPUT_FILES/values_from_mesher.h"
 
-end module constants
+end module constants_solver
 
 !=====================================================================
 
@@ -43,7 +43,7 @@
 
 ! main parameter module for specfem simulations
 
-  use constants
+  use constants_solver
 
   implicit none
 
@@ -254,10 +254,6 @@
   logical, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: this_region_has_a_doubling
   double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: rmins,rmaxs
 
-  ! temporary arrays for elements on slices or edges
-  logical, dimension(:),allocatable :: is_on_a_slice_edge_crust_mantle, &
-    is_on_a_slice_edge_inner_core,is_on_a_slice_edge_outer_core
-
   !-----------------------------------------------------------------
   ! MPI partitions
   !-----------------------------------------------------------------
@@ -277,60 +273,6 @@
   ! assembly
   !-----------------------------------------------------------------
 
-  ! ---- arrays to assemble between chunks
-  ! communication pattern for faces between chunks
-  integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces,imsg_type
-  ! communication pattern for corners between chunks
-  integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
-
-  ! indirect addressing for each message for faces and corners of the chunks
-  ! a given slice can belong to at most one corner and at most two faces
-  integer :: NGLOB2DMAX_XY
-
-  ! this for non blocking MPI
-
-  ! buffers for send and receive between faces of the slices and the chunks
-  ! we use the same buffers to assemble scalars and vectors because vectors are
-  ! always three times bigger and therefore scalars can use the first part
-  ! of the vector buffer in memory even if it has an additional index here
-  integer :: npoin2D_max_all_CM_IC
-  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_send_faces,buffer_received_faces, &
-                                                           b_buffer_send_faces,b_buffer_received_faces
-
-  ! buffers for send and receive between corners of the chunks
-  real(kind=CUSTOM_REAL), dimension(NGLOB1D_RADIAL_CM) :: &
-    buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
-    b_buffer_send_chunkcorn_scalar,b_buffer_recv_chunkcorn_scalar
-
-  ! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
-  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC) :: &
-     buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
-     b_buffer_send_chunkcorn_vector,b_buffer_recv_chunkcorn_vector
-
-  ! request ids for non-blocking MPI
-  integer :: request_send,request_receive
-  integer, dimension(NUMFACES_SHARED) :: request_send_array,request_receive_array
-  integer :: request_send_cc,request_receive_cc
-  integer, dimension(NPROC_XI_VAL+4) :: request_send_array_cc,request_receive_array_cc
-
-  integer :: b_request_send,b_request_receive
-  integer, dimension(NUMFACES_SHARED) :: b_request_send_array,b_request_receive_array
-  integer :: b_request_send_cc,b_request_receive_cc
-  integer, dimension(NPROC_XI_VAL+4) :: b_request_send_array_cc,b_request_receive_array_cc
-
-
-  logical, dimension(NGLOB_CRUST_MANTLE) :: mask_ibool
-
-  ! number of faces between chunks
-  integer :: NUMMSGS_FACES
-
-  ! number of corners between chunks
-  integer :: NCORNERSCHUNKS
-
-  ! number of message types
-  integer :: NUM_MSG_TYPES
-
-
   ! collected MPI interfaces
   ! MPI crust/mantle mesh
   integer :: num_interfaces_crust_mantle
@@ -368,6 +310,10 @@
   integer, dimension(:), allocatable :: request_send_scalar_outer_core,request_recv_scalar_outer_core
   integer, dimension(:), allocatable :: b_request_send_scalar_outer_core,b_request_recv_scalar_outer_core
 
+
+  ! temporary array
+  logical, dimension(NGLOB_CRUST_MANTLE) :: mask_ibool
+
   !-----------------------------------------------------------------
   ! gpu
   !-----------------------------------------------------------------
@@ -385,7 +331,7 @@
 
 ! parameter module for elastic solver in crust/mantle region
 
-  use constants
+  use constants_solver
   implicit none
 
   ! ----------------- crust, mantle and oceans ---------------------
@@ -436,13 +382,9 @@
     one_minus_sum_beta_crust_mantle, factor_scale_crust_mantle
   real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1,ATT2,ATT3,ATT4) :: &
     factor_common_crust_mantle
-!  real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: &
-!    R_memory_crust_mantle
   real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: &
     R_xx_crust_mantle,R_yy_crust_mantle,R_xy_crust_mantle,R_xz_crust_mantle,R_yz_crust_mantle
 
-!  real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
-!    epsilondev_crust_mantle
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
     epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
     epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle
@@ -451,13 +393,9 @@
     eps_trace_over_3_crust_mantle
 
   ! ADJOINT
-!  real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT) :: &
-!    b_R_memory_crust_mantle
   real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT) :: &
     b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle,b_R_xz_crust_mantle,b_R_yz_crust_mantle
 
-!  real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
-!    b_epsilondev_crust_mantle
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
     b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,b_epsilondev_xy_crust_mantle, &
     b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle
@@ -496,28 +434,12 @@
   integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_CM) :: nimin_crust_mantle,nimax_crust_mantle,nkmin_eta_crust_mantle
   integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_CM) :: njmin_crust_mantle,njmax_crust_mantle,nkmin_xi_crust_mantle
 
-  ! daniel: not sure why name ...5 and dimensions with one additional?
-  !real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: absorb_xmin_crust_mantle5, &
-  !  absorb_xmax_crust_mantle5, absorb_ymin_crust_mantle5, absorb_ymax_crust_mantle5
   real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: absorb_xmin_crust_mantle, &
     absorb_xmax_crust_mantle, absorb_ymin_crust_mantle, absorb_ymax_crust_mantle
 
   integer :: reclen_xmin_crust_mantle, reclen_xmax_crust_mantle, &
             reclen_ymin_crust_mantle,reclen_ymax_crust_mantle
 
-  ! assembly
-  integer :: npoin2D_faces_crust_mantle(NUMFACES_SHARED)
-  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle
-
-  ! indirect addressing for each corner of the chunks
-  integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
-
-  ! 2-D addressing and buffers for summation between slices
-  integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
-  integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
-
-  integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
-
   ! kernels
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
     rho_kl_crust_mantle,beta_kl_crust_mantle, alpha_kl_crust_mantle, Sigma_kl_crust_mantle
@@ -553,6 +475,10 @@
   integer :: nspec_inner_crust_mantle,nspec_outer_crust_mantle
   integer, dimension(:,:), allocatable :: phase_ispec_inner_crust_mantle
 
+  ! mesh coloring
+  integer :: num_colors_outer_crust_mantle,num_colors_inner_crust_mantle
+  integer,dimension(:),allocatable :: num_elem_colors_crust_mantle
+
 end module specfem_par_crustmantle
 
 !=====================================================================
@@ -561,7 +487,7 @@
 
 ! parameter module for elastic solver in inner core region
 
-  use constants
+  use constants_solver
   implicit none
 
   ! ----------------- inner core ---------------------
@@ -584,8 +510,6 @@
 
   ! local to global mapping
   integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
-  ! only needed for compute_boundary_kernel() routine
-  !logical, dimension(NSPEC_INNER_CORE) :: ispec_is_tiso_inner_core
 
   ! mass matrix
   real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: rmass_inner_core
@@ -602,13 +526,9 @@
     one_minus_sum_beta_inner_core, factor_scale_inner_core
   real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1,ATT2,ATT3,ATT5) :: &
     factor_common_inner_core
-!  real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
-!    R_memory_inner_core
   real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
     R_xx_inner_core,R_yy_inner_core,R_xy_inner_core,R_xz_inner_core,R_yz_inner_core
 
-!  real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: &
-!    epsilondev_inner_core
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: &
     epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
     epsilondev_xz_inner_core,epsilondev_yz_inner_core
@@ -616,14 +536,9 @@
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STRAIN_ONLY) :: &
     eps_trace_over_3_inner_core
   ! ADJOINT
-!  real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT) :: &
-!    b_R_memory_inner_core
   real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT) :: &
     b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core,b_R_xz_inner_core,b_R_yz_inner_core
 
-
-!  real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
-!    b_epsilondev_inner_core
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
     b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
     b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core
@@ -632,34 +547,14 @@
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
     b_eps_trace_over_3_inner_core
 
-  ! assembly
-  ! for matching with central cube in inner core
-  integer, dimension(:), allocatable :: sender_from_slices_to_cube
-  integer, dimension(:,:), allocatable :: ibool_central_cube
-  double precision, dimension(:,:), allocatable :: buffer_slices,b_buffer_slices,buffer_slices2
-  double precision, dimension(:,:,:), allocatable :: buffer_all_cube_from_slices,b_buffer_all_cube_from_slices
-  integer nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,receiver_cube_from_slices
-
+  ! coupling/boundary surfaces
   integer :: nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
             nspec2D_ymin_inner_core,nspec2D_ymax_inner_core
-
   integer, dimension(NSPEC2DMAX_XMIN_XMAX_IC) :: ibelm_xmin_inner_core,ibelm_xmax_inner_core
   integer, dimension(NSPEC2DMAX_YMIN_YMAX_IC) :: ibelm_ymin_inner_core,ibelm_ymax_inner_core
   integer, dimension(NSPEC2D_BOTTOM_IC) :: ibelm_bottom_inner_core
   integer, dimension(NSPEC2D_TOP_IC) :: ibelm_top_inner_core
 
-  integer :: npoin2D_faces_inner_core(NUMFACES_SHARED)
-  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_inner_core,npoin2D_eta_inner_core
-
-  ! indirect addressing for each corner of the chunks
-  integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
-
-  ! 2-D addressing and buffers for summation between slices
-  integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
-  integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
-
-  integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
-
   ! adjoint kernels
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
     rho_kl_inner_core,beta_kl_inner_core, alpha_kl_inner_core
@@ -673,6 +568,9 @@
   integer :: nspec_inner_inner_core,nspec_outer_inner_core
   integer, dimension(:,:), allocatable :: phase_ispec_inner_inner_core
 
+  ! mesh coloring
+  integer :: num_colors_outer_inner_core,num_colors_inner_inner_core
+  integer,dimension(:),allocatable :: num_elem_colors_inner_core
 
 end module specfem_par_innercore
 
@@ -682,7 +580,7 @@
 
 ! parameter module for acoustic solver in outer core region
 
-  use constants
+  use constants_solver
   implicit none
 
   ! ----------------- outer core ---------------------
@@ -700,11 +598,6 @@
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: &
     rhostore_outer_core,kappavstore_outer_core
 
-  ! local to global mapping
-  !integer, dimension(NSPEC_OUTER_CORE) :: idoubling_outer_core
-  ! only needed for compute_boundary_kernel()
-  !logical, dimension(NSPEC_OUTER_CORE) :: ispec_is_tiso_outer_core
-
   ! mass matrix
   real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: rmass_outer_core
 
@@ -751,19 +644,6 @@
   real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_OC) :: jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_OC) :: jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core
 
-  ! assembly
-  integer :: npoin2D_faces_outer_core(NUMFACES_SHARED)
-  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_outer_core,npoin2D_eta_outer_core
-
-  ! indirect addressing for each corner of the chunks
-  integer, dimension(NGLOB1D_RADIAL_OC,NUMCORNERS_SHARED) :: iboolcorner_outer_core
-
-  ! 2-D addressing and buffers for summation between slices
-  integer, dimension(NGLOB2DMAX_XMIN_XMAX_OC) :: iboolleft_xi_outer_core,iboolright_xi_outer_core
-  integer, dimension(NGLOB2DMAX_YMIN_YMAX_OC) :: iboolleft_eta_outer_core,iboolright_eta_outer_core
-
-  integer, dimension(NGLOB2DMAX_XY_VAL,NUMFACES_SHARED) :: iboolfaces_outer_core
-
   ! adjoint kernels
   real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: &
     rho_kl_outer_core,alpha_kl_outer_core
@@ -784,6 +664,9 @@
   integer :: nspec_inner_outer_core,nspec_outer_outer_core
   integer, dimension(:,:), allocatable :: phase_ispec_inner_outer_core
 
+  ! mesh coloring
+  integer :: num_colors_outer_outer_core,num_colors_inner_outer_core
+  integer,dimension(:),allocatable :: num_elem_colors_outer_core
 
 end module specfem_par_outercore
 
@@ -794,7 +677,7 @@
 
 ! parameter module for movies/shakemovies
 
-  use constants
+  use constants_solver
 
   implicit none
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_output.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_output.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_output.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -35,10 +35,10 @@
   implicit none
 
   ! local parameters
-!daniel: debugging
+  ! debugging
   character(len=256) :: filename
-  !integer,dimension(:),allocatable :: dummy_i
-  logical, parameter :: SNAPSHOT_INNER_CORE = .true.
+  integer,dimension(:),allocatable :: dummy_i
+  logical, parameter :: DEBUG_SNAPSHOT = .false.
 
   ! save movie on surface
   if( MOVIE_SURFACE ) then
@@ -164,8 +164,8 @@
     endif
   endif ! MOVIE_VOLUME
 
-!daniel: debugging
-  if( SNAPSHOT_INNER_CORE ) then
+  ! debugging
+  if( DEBUG_SNAPSHOT ) then
     if( mod(it-MOVIE_START,NTSTEP_BETWEEN_FRAMES) == 0  &
       .and. it >= MOVIE_START .and. it <= MOVIE_STOP) then
 
@@ -177,31 +177,39 @@
 
       ! VTK file output
       ! displacement values
+
+      ! crust mantle
+      allocate(dummy_i(NSPEC_CRUST_MANTLE))
+      dummy_i(:) = IFLAG_CRUST
       ! one file per process
-      !write(prname,'(a,i6.6,a)') trim(LOCAL_TMP_PATH)//'/'//'proc',myrank,'_'
-      !write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_3_displ_',it
-      !call write_VTK_data_cr(idoubling_inner_core,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
-      !                    xstore_inner_core,ystore_inner_core,zstore_inner_core,ibool_inner_core, &
-      !                    displ_inner_core,filename)
-      ! single file for all processes
-      ! crust mantle
-      !allocate(dummy_i(NSPEC_CRUST_MANTLE))
-      !dummy_i(:) = IFLAG_CRUST
+      write(prname,'(a,i6.6,a)') 'OUTPUT_FILES/snapshot_proc',myrank,'_'
+      write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_1_displ_',it
+      call write_VTK_data_cr(dummy_i,NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
+                          xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle, &
+                          displ_crust_mantle,filename)
+      ! single file for all
       !write(prname,'(a)') 'OUTPUT_FILES/snapshot_all_'
       !write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_1_displ_',it
-      !call write_VTK_data_cr_all(myrank,dummy_i, &
+      !call write_VTK_data_cr_all(myrank,NPROCTOT_VAL,dummy_i, &
       !                    NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
       !                    xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle, &
       !                    displ_crust_mantle,filename)
-      !deallocate(dummy_i)
+      deallocate(dummy_i)
 
       ! inner core
-      write(prname,'(a)') 'OUTPUT_FILES/snapshot_all_'
-      write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_3_displ_',it
-      call write_VTK_data_cr_all(myrank,idoubling_inner_core, &
-                          NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
-                          xstore_inner_core,ystore_inner_core,zstore_inner_core,ibool_inner_core, &
-                          displ_inner_core,filename)
+      ! one file per process
+      !write(prname,'(a,i6.6,a)') trim(LOCAL_TMP_PATH)//'/'//'proc',myrank,'_'
+      !write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_3_displ_',it
+      !call write_VTK_data_cr(idoubling_inner_core,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+      !                    xstore_inner_core,ystore_inner_core,zstore_inner_core,ibool_inner_core, &
+      !                    displ_inner_core,filename)
+      ! single file for all
+      !write(prname,'(a)') 'OUTPUT_FILES/snapshot_all_'
+      !write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_3_displ_',it
+      !call write_VTK_data_cr_all(myrank,NPROCTOT_VAL,idoubling_inner_core, &
+      !                    NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+      !                    xstore_inner_core,ystore_inner_core,zstore_inner_core,ibool_inner_core, &
+      !                    displ_inner_core,filename)
     endif
   endif
 

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_volume.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_volume.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_movie_volume.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -886,326 +886,3 @@
 
   end subroutine write_movie_volume_divcurl
 
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! external mesh routine for saving vtk files for custom_real values on global points
-
-  subroutine write_VTK_data_cr(idoubling,nspec,nglob, &
-                              xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
-                              glob_data,prname_file)
-
-! outputs single file for each process
-
-  implicit none
-
-  include "constants.h"
-
-  integer :: nspec,nglob
-
-  integer, dimension(nspec):: idoubling
-
-  ! global coordinates
-  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
-
-  ! global data values array
-  real(kind=CUSTOM_REAL), dimension(NDIM,nglob) :: glob_data
-
-  ! file name
-  character(len=256) prname_file
-
-  ! local parameters
-  integer :: ispec,i
-  real(kind=CUSTOM_REAL) :: rval,thetaval,phival,xval,yval,zval
-
-  ! write source and receiver VTK files for Paraview
-  open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
-  write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
-  write(IOVTK,'(a)') 'material model VTK file'
-  write(IOVTK,'(a)') 'ASCII'
-  write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
-  write(IOVTK, '(a,i12,a)') 'POINTS ', nglob, ' float'
-  do i=1,nglob
-
-    !x,y,z store have been converted to r theta phi already, need to revert back for xyz output
-    rval = xstore_dummy(i)
-    thetaval = ystore_dummy(i)
-    phival = zstore_dummy(i)
-    call rthetaphi_2_xyz(xval,yval,zval,rval,thetaval,phival)
-
-    !write(IOVTK,'(3e18.6)') xstore_dummy(i),ystore_dummy(i),zstore_dummy(i)
-    write(IOVTK,'(3e18.6)') xval,yval,zval
-  enddo
-  write(IOVTK,*) ""
-
-  ! defines cell on coarse corner points
-  ! note: indices for vtk start at 0
-  write(IOVTK,'(a,i12,i12)') "CELLS ",nspec,nspec*9
-  do ispec=1,nspec
-
-    ! specific to inner core elements
-    ! exclude fictitious elements in central cube
-    if(idoubling(ispec) /= IFLAG_IN_FICTITIOUS_CUBE) then
-      ! valid cell
-      write(IOVTK,'(9i12)') 8,ibool(1,1,1,ispec)-1, &
-                          ibool(NGLLX,1,1,ispec)-1, &
-                          ibool(NGLLX,NGLLY,1,ispec)-1, &
-                          ibool(1,NGLLY,1,ispec)-1, &
-                          ibool(1,1,NGLLZ,ispec)-1, &
-                          ibool(NGLLX,1,NGLLZ,ispec)-1, &
-                          ibool(NGLLX,NGLLY,NGLLZ,ispec)-1, &
-                          ibool(1,NGLLY,NGLLZ,ispec)-1
-    else
-      ! fictitious elements in central cube
-      ! maps cell onto a randomly chosen point
-      write(IOVTK,'(9i12)') 8,ibool(1,1,1,1)-1, &
-                            ibool(1,1,1,1)-1, &
-                            ibool(1,1,1,1)-1, &
-                            ibool(1,1,1,1)-1, &
-                            ibool(1,1,1,1)-1, &
-                            ibool(1,1,1,1)-1, &
-                            ibool(1,1,1,1)-1, &
-                            ibool(1,1,1,1)-1
-    endif
-
-  enddo
-  write(IOVTK,*) ""
-
-  ! type: hexahedrons
-  write(IOVTK,'(a,i12)') "CELL_TYPES ",nspec
-  write(IOVTK,*) (12,ispec=1,nspec)
-  write(IOVTK,*) ""
-
-  ! x components
-  write(IOVTK,'(a,i12)') "POINT_DATA ",nglob
-  write(IOVTK,'(a)') "SCALARS x_comp float"
-  write(IOVTK,'(a)') "LOOKUP_TABLE default"
-  do i = 1,nglob
-      write(IOVTK,*) glob_data(1,i)
-  enddo
-  ! y components
-  write(IOVTK,'(a)') "SCALARS y_comp float"
-  write(IOVTK,'(a)') "LOOKUP_TABLE default"
-  do i = 1,nglob
-      write(IOVTK,*) glob_data(2,i)
-  enddo
-  ! z components
-  write(IOVTK,'(a)') "SCALARS z_comp float"
-  write(IOVTK,'(a)') "LOOKUP_TABLE default"
-  do i = 1,nglob
-      write(IOVTK,*) glob_data(3,i)
-  enddo
-  ! norm
-  write(IOVTK,'(a)') "SCALARS norm float"
-  write(IOVTK,'(a)') "LOOKUP_TABLE default"
-  do i = 1,nglob
-      write(IOVTK,*) sqrt( glob_data(1,i)*glob_data(1,i) &
-                        + glob_data(2,i)*glob_data(2,i) &
-                        + glob_data(3,i)*glob_data(3,i))
-  enddo
-  write(IOVTK,*) ""
-
-  close(IOVTK)
-
-
-  end subroutine write_VTK_data_cr
-
-!
-!-------------------------------------------------------------------------------------------------
-!
-
-! external mesh routine for saving vtk files for custom_real values on global points
-
-  subroutine write_VTK_data_cr_all(myrank,idoubling, &
-                              nspec,nglob, &
-                              xstore_dummy,ystore_dummy,zstore_dummy,ibool, &
-                              glob_data,prname_file)
-
-! outputs single file for all processes
-
-  implicit none
-
-  include 'mpif.h'
-  include "precision.h"
-  include "constants.h"
-  include "OUTPUT_FILES/values_from_mesher.h"
-
-  integer :: myrank,nspec,nglob
-
-  integer, dimension(nspec):: idoubling
-
-  ! global coordinates
-  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
-  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore_dummy,ystore_dummy,zstore_dummy
-
-  ! global data values array
-  real(kind=CUSTOM_REAL), dimension(NDIM,nglob) :: glob_data
-
-  ! file name
-  character(len=256) prname_file
-
-  ! local parameters
-  integer :: ispec,i,iproc,ier
-  real(kind=CUSTOM_REAL) :: rval,thetaval,phival,xval,yval,zval
-
-  real(kind=CUSTOM_REAL), dimension(:,:),allocatable :: &
-      store_val_x_all,store_val_y_all,store_val_z_all, &
-      store_val_ux_all,store_val_uy_all,store_val_uz_all
-  integer, dimension(:,:,:,:,:),allocatable :: ibool_all
-  integer, dimension(:,:),allocatable :: idoubling_all
-
-  ! master collect arrays
-  if( myrank == 0 ) then
-    allocate(store_val_x_all(nglob,0:NPROCTOT_VAL-1), &
-            store_val_y_all(nglob,0:NPROCTOT_VAL-1), &
-            store_val_z_all(nglob,0:NPROCTOT_VAL-1), &
-            store_val_ux_all(nglob,0:NPROCTOT_VAL-1), &
-            store_val_uy_all(nglob,0:NPROCTOT_VAL-1), &
-            store_val_uz_all(nglob,0:NPROCTOT_VAL-1), &
-            idoubling_all(nspec,0:NPROCTOT_VAL-1), &
-            ibool_all(NGLLX,NGLLY,NGLLZ,nspec,0:NPROCTOT_VAL-1),stat=ier)
-    if( ier /= 0 ) call exit_mpi(myrank,'error allocating stores')
-  else
-    ! dummy arrays
-    allocate(store_val_x_all(1,1), &
-            store_val_y_all(1,1), &
-            store_val_z_all(1,1), &
-            store_val_ux_all(1,1), &
-            store_val_uy_all(1,1), &
-            store_val_uz_all(1,1), &
-            idoubling_all(1,1), &
-            ibool_all(1,1,1,1,1),stat=ier)
-    if( ier /= 0 ) call exit_mpi(myrank,'error allocating dummy stores')
-  endif
-
-  ! gather info on master proc
-  call MPI_GATHER(xstore_dummy,nglob,CUSTOM_MPI_TYPE,store_val_x_all,nglob,CUSTOM_MPI_TYPE,0,MPI_COMM_WORLD,ier)
-  call MPI_GATHER(ystore_dummy,nglob,CUSTOM_MPI_TYPE,store_val_y_all,nglob,CUSTOM_MPI_TYPE,0,MPI_COMM_WORLD,ier)
-  call MPI_GATHER(zstore_dummy,nglob,CUSTOM_MPI_TYPE,store_val_z_all,nglob,CUSTOM_MPI_TYPE,0,MPI_COMM_WORLD,ier)
-
-  call MPI_GATHER(glob_data(1,:),nglob,CUSTOM_MPI_TYPE,store_val_ux_all,nglob,CUSTOM_MPI_TYPE,0,MPI_COMM_WORLD,ier)
-  call MPI_GATHER(glob_data(2,:),nglob,CUSTOM_MPI_TYPE,store_val_uy_all,nglob,CUSTOM_MPI_TYPE,0,MPI_COMM_WORLD,ier)
-  call MPI_GATHER(glob_data(3,:),nglob,CUSTOM_MPI_TYPE,store_val_uz_all,nglob,CUSTOM_MPI_TYPE,0,MPI_COMM_WORLD,ier)
-
-  call MPI_GATHER(ibool,NGLLX*NGLLY*NGLLZ*nspec,MPI_INTEGER,ibool_all, &
-                  NGLLX*NGLLY*NGLLZ*nspec,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
-  call MPI_GATHER(idoubling,nspec,MPI_INTEGER,idoubling_all,nspec,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
-
-
-  if( myrank == 0 ) then
-
-    ! write source and receiver VTK files for Paraview
-    open(IOVTK,file=prname_file(1:len_trim(prname_file))//'.vtk',status='unknown')
-    write(IOVTK,'(a)') '# vtk DataFile Version 3.1'
-    write(IOVTK,'(a)') 'material model VTK file'
-    write(IOVTK,'(a)') 'ASCII'
-    write(IOVTK,'(a)') 'DATASET UNSTRUCTURED_GRID'
-    write(IOVTK, '(a,i12,a)') 'POINTS ', nglob*NPROCTOT_VAL, ' float'
-    do iproc=0, NPROCTOT_VAL-1
-      do i=1,nglob
-
-        !x,y,z store have been converted to r theta phi already, need to revert back for xyz output
-        rval = store_val_x_all(i,iproc)
-        thetaval = store_val_y_all(i,iproc)
-        phival = store_val_z_all(i,iproc)
-        call rthetaphi_2_xyz(xval,yval,zval,rval,thetaval,phival)
-
-        !write(IOVTK,'(3e18.6)') xstore_dummy(i),ystore_dummy(i),zstore_dummy(i)
-        write(IOVTK,'(3e18.6)') xval,yval,zval
-      enddo
-    enddo
-    write(IOVTK,*) ""
-
-    ! defines cell on coarse corner points
-    ! note: indices for vtk start at 0
-    write(IOVTK,'(a,i12,i12)') "CELLS ",nspec*NPROCTOT_VAL,nspec*NPROCTOT_VAL*9
-    do iproc=0, NPROCTOT_VAL-1
-      do ispec=1,nspec
-
-        ! note: central cube elements are only shared and used in CHUNK_AB and CHUNK_AB_ANTIPODE
-        !          all other chunks ignore those elements
-
-        ! specific to inner core elements
-        ! exclude fictitious elements in central cube
-        if(idoubling_all(ispec,iproc) /= IFLAG_IN_FICTITIOUS_CUBE) then
-          ! valid cell
-          ! cell corner ids
-          write(IOVTK,'(9i12)') 8,ibool_all(1,1,1,ispec,iproc)-1+iproc*nglob, &
-                            ibool_all(NGLLX,1,1,ispec,iproc)-1+iproc*nglob, &
-                            ibool_all(NGLLX,NGLLY,1,ispec,iproc)-1+iproc*nglob, &
-                            ibool_all(1,NGLLY,1,ispec,iproc)-1+iproc*nglob, &
-                            ibool_all(1,1,NGLLZ,ispec,iproc)-1+iproc*nglob, &
-                            ibool_all(NGLLX,1,NGLLZ,ispec,iproc)-1+iproc*nglob, &
-                            ibool_all(NGLLX,NGLLY,NGLLZ,ispec,iproc)-1+iproc*nglob, &
-                            ibool_all(1,NGLLY,NGLLZ,ispec,iproc)-1+iproc*nglob
-        else
-          ! fictitious elements in central cube
-          ! maps cell onto a randomly chosen point
-          write(IOVTK,'(9i12)') 8,ibool_all(1,1,1,1,iproc)-1, &
-                            ibool_all(1,1,1,1,iproc)-1, &
-                            ibool_all(1,1,1,1,iproc)-1, &
-                            ibool_all(1,1,1,1,iproc)-1, &
-                            ibool_all(1,1,1,1,iproc)-1, &
-                            ibool_all(1,1,1,1,iproc)-1, &
-                            ibool_all(1,1,1,1,iproc)-1, &
-                            ibool_all(1,1,1,1,iproc)-1
-        endif
-
-      enddo
-    enddo
-    write(IOVTK,*) ""
-
-    ! type: hexahedrons
-    write(IOVTK,'(a,i12)') "CELL_TYPES ",nspec*NPROCTOT_VAL
-    write(IOVTK,*) (12,ispec=1,nspec*NPROCTOT_VAL)
-    write(IOVTK,*) ""
-
-    ! x components
-    write(IOVTK,'(a,i12)') "POINT_DATA ",nglob*NPROCTOT_VAL
-    write(IOVTK,'(a)') "SCALARS x_comp float"
-    write(IOVTK,'(a)') "LOOKUP_TABLE default"
-    do iproc=0, NPROCTOT_VAL-1
-      do i = 1,nglob
-        write(IOVTK,*) store_val_ux_all(i,iproc)
-      enddo
-    enddo
-    ! y components
-    write(IOVTK,'(a)') "SCALARS y_comp float"
-    write(IOVTK,'(a)') "LOOKUP_TABLE default"
-    do iproc=0, NPROCTOT_VAL-1
-      do i = 1,nglob
-        write(IOVTK,*) store_val_uy_all(i,iproc)
-      enddo
-    enddo
-    ! z components
-    write(IOVTK,'(a)') "SCALARS z_comp float"
-    write(IOVTK,'(a)') "LOOKUP_TABLE default"
-    do iproc=0, NPROCTOT_VAL-1
-      do i = 1,nglob
-        write(IOVTK,*) store_val_uz_all(i,iproc)
-      enddo
-    enddo
-    ! norm
-    write(IOVTK,'(a)') "SCALARS norm float"
-    write(IOVTK,'(a)') "LOOKUP_TABLE default"
-    do iproc=0, NPROCTOT_VAL-1
-      do i = 1,nglob
-        write(IOVTK,*) sqrt( store_val_ux_all(i,iproc)**2 &
-                          + store_val_uy_all(i,iproc)**2 &
-                          + store_val_uz_all(i,iproc)**2 )
-      enddo
-    enddo
-    write(IOVTK,*) ""
-
-    close(IOVTK)
-
-  endif
-
-  deallocate(store_val_x_all,store_val_y_all,store_val_z_all, &
-            store_val_ux_all,store_val_uy_all,store_val_uz_all, &
-            ibool_all)
-
-  end subroutine write_VTK_data_cr_all

Modified: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90	2012-04-16 17:59:37 UTC (rev 19947)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/write_seismograms.f90	2012-04-16 21:29:35 UTC (rev 19948)
@@ -123,7 +123,7 @@
 ! write seismograms to files
   subroutine write_seismograms_to_file()
 
-  use constants
+  use constants_solver
   use specfem_par,only: &
           NPROCTOT_VAL,myrank,nrec,nrec_local, &
           number_receiver_global,seismograms, &
@@ -335,7 +335,7 @@
 
   subroutine write_one_seismogram(one_seismogram,irec)
 
-  use constants
+  use constants_solver
   use specfem_par,only: &
           ANGULAR_WIDTH_XI_IN_DEGREES,NEX_XI, &
           myrank,nrec, &