[cig-commits] r22470 - in seismo/3D/SPECFEM3D_GLOBE: branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D trunk/src/specfem3D

Sun Jun 30 18:39:52 PDT 2013

Author: dkomati1
Date: 2013-06-30 18:39:52 -0700 (Sun, 30 Jun 2013)
New Revision: 22470

Added:
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube_block.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar_block.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector_block.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_noDev.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_noDev.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_noDev.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/create_central_cube_buffers.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/fix_non_blocking_flags.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_regular_points.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_buffers_solver.f90
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/rules.mk
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_regular_kernels.f90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_acoustic.F90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle.f90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_elastic.F90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core.f90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core.f90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/finalize_simulation.f90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/iterate_time.F90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_arrays_solver_adios.f90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_attenuation_adios.f90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_forward_arrays_adios.F90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_mesh_databases_adios.f90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_topography_bathymetry.f90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/save_forward_arrays_adios.F90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/setup_GLL_points.f90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D_par.F90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/write_movie_output.f90
   seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/write_specfem_adios_header.F90
Removed:
   seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in
Log:
done merging new files in "specfem3D"


Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile
===================================================================

--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,56 @@
+#=====================================================================
+#
+#          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+#          --------------------------------------------------
+#
+#          Main authors: Dimitri Komatitsch and Jeroen Tromp
+#                        Princeton University, USA
+#             and University of Pau / CNRS / INRIA, France
+# (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+#                            April 2011
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#=====================================================================
+
+DIR = specfem3D
+
+# The rest of this file is generic
+#######################################
+
+####
+#### targets
+####
+
+default:
+	$(MAKE) -C ../.. $(DIR)
+
+all:
+	$(MAKE) -C ../.. all
+
+clean:
+	$(MAKE) -C ../.. CLEAN=$(DIR) clean
+
+cleanall:
+	$(MAKE) -C ../.. clean
+
+backup:
+	mkdir -p bak
+	cp *f90 *h Makefile bak
+
+bak: backup
+
+.PHONY: default all clean cleanall backup bak
+

Deleted: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in	2013-07-01 01:33:15 UTC (rev 22469)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/Makefile.in	2013-07-01 01:39:52 UTC (rev 22470)
@@ -1,462 +0,0 @@
-#=====================================================================
-#
-#          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
-#          --------------------------------------------------
-#
-#          Main authors: Dimitri Komatitsch and Jeroen Tromp
-#                        Princeton University, USA
-#             and University of Pau / CNRS / INRIA, France
-# (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
-#                            April 2011
-#
-# This program is free software; you can redistribute it and/or modify
-# it under the terms of the GNU General Public License as published by
-# the Free Software Foundation; either version 2 of the License, or
-# (at your option) any later version.
-#
-# This program is distributed in the hope that it will be useful,
-# but WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-# GNU General Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-#
-#=====================================================================
-
-# @configure_input@
-
-# CUDA
-# with configure: ./configure --with-cuda CUDA_LIB=.. CUDA_INC=.. MPI_INC=..
-
-# default cuda libraries
-# runtime library -lcudart needed, others are optional -lcuda -lcublas
- at COND_CUDA_TRUE@CUDA_LIBS = -lcudart
- at COND_CUDA_FALSE@CUDA_LIBS = 
-
-CUDA_LIB_LOCATION = @CUDA_LIB@
-CUDA_LINK = $(CUDA_LIB_LOCATION) $(CUDA_LIBS)
-CUDA_INC = @CUDA_INC@ -I../../setup -I../../
-MPI_INC = @MPI_INC@
-
- at COND_CUDA_TRUE@NVCC = nvcc
- at COND_CUDA_FALSE@NVCC = @CC@
-
-# GPU architecture
-
-# CUDA architecture / code version
-# Fermi: -gencode=arch=compute_10,code=sm_10 not supported
-# Tesla (default): -gencode=arch=compute_20,code=sm_20
-# Geforce GT 650m: -gencode=arch=compute_30,code=sm_30
-# Kepler (cuda5) : -gencode=arch=compute_35,code=sm_35
-GENCODE_20 = -gencode=arch=compute_20,code=\"sm_20,compute_20\"
-GENCODE_30 = -gencode=arch=compute_30,code=\"sm_30,compute_30\"
-GENCODE_35 = -gencode=arch=compute_35,code=\"sm_35,compute_35\"
-
-# CUDA version 5.x
- at COND_CUDA_TRUE@@COND_CUDA5_TRUE at GENCODE = $(GENCODE_35)
-# CUDA version 4.x
- at COND_CUDA_TRUE@@COND_CUDA5_FALSE at GENCODE = $(GENCODE_20)
-
-# CUDA flags and linking
- at COND_CUDA_TRUE@NVCC_FLAGS_BASE = $(CUDA_INC) $(MPI_INC)
- at COND_CUDA_TRUE@@COND_CUDA5_TRUE at NVCC_FLAGS = $(NVCC_FLAGS_BASE) -dc -DCUDA $(GENCODE)
- at COND_CUDA_TRUE@@COND_CUDA5_FALSE at NVCC_FLAGS = $(NVCC_FLAGS_BASE) -DCUDA -DUSE_OLDER_CUDA4_GPU $(GENCODE)
-
- at COND_CUDA_TRUE@@COND_CUDA5_TRUE at NVCCLINK_BASE = $(NVCC) $(CUDA_INC) $(MPI_INC) -DCUDA
- at COND_CUDA_TRUE@@COND_CUDA5_TRUE at NVCCLINK = $(NVCCLINK_BASE) -dlink $(GENCODE)
- at COND_CUDA_TRUE@@COND_CUDA5_FALSE at NVCCLINK = $(NVCCLINK_BASE) -DUSE_OLDER_CUDA4_GPU $(GENCODE)
-
- at COND_CUDA_FALSE@NVCC_FLAGS = $(MPI_INC)
- at COND_CUDA_FALSE@NVCCLINK = $(NVCC) $(NVCC_FLAGS)
-
-
-FC = @FC@
-FCFLAGS = #@FCFLAGS@
-MPIFC = @MPIFC@
-MPILIBS = @MPILIBS@
-FLAGS_CHECK = @FLAGS_CHECK@
-FLAGS_NO_CHECK = @FLAGS_NO_CHECK@
-FCFLAGS_f90 = @FCFLAGS_f90@ -I../../setup -I../..
-
-FCCOMPILE_CHECK =@FCENV@ ${FC} ${FCFLAGS} $(FLAGS_CHECK)
-FCCOMPILE_NO_CHECK =@FCENV@ ${FC} ${FCFLAGS} $(FLAGS_NO_CHECK)
-MPIFCCOMPILE_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(FLAGS_CHECK)
-MPIFCCOMPILE_NO_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(FLAGS_NO_CHECK)
-
-FCLINK = $(MPIFCCOMPILE_NO_CHECK)
-
-CC = @CC@
-CFLAGS = @CFLAGS@
-CPPFLAGS = -I../../setup @CPPFLAGS@
-
-LDFLAGS = @LDFLAGS@
-
-#AR = ar
-#ARFLAGS = cru
-#RANLIB = ranlib
-AR = @AR@
-ARFLAGS = @ARFLAGS@
-RANLIB = @RANLIB@
-
-## compilation directories
-# E : executables directory
-E = ../../bin
-# O : objects directory
-O = ../../obj
-# SHARED : shared directoy
-SHARED = ../shared
-# S : source file directory
-S = .
-# root directory
-S_TOP = ../..
-## setup file directory
-SETUP = ../../setup
-## output file directory
-OUTPUT = ../../OUTPUT_FILES
-# CUDAD : cuda directory
-CUDAD = ../cuda
-
-#######################################
-
-libspecfem_a_OBJECTS_SOLVER = \
-	$O/assemble_MPI_scalar.mpicheckno.o \
-	$O/assemble_MPI_vector.mpicheckno.o \
-	$O/auto_ner.shared.o \
-	$O/broadcast_compute_parameters.sharedmpi.o \
-	$O/calendar.shared.o \
-	$O/comp_source_spectrum.check.o \
-	$O/comp_source_time_function.check.o \
-	$O/compute_adj_source_frechet.check.o \
-	$O/compute_arrays_source.check.o \
-	$O/convert_time.check.o \
-	$O/create_name_database.shared.o \
-	$O/count_elements.shared.o \
-	$O/count_number_of_sources.shared.o \
-	$O/count_points.shared.o \
-	$O/define_all_layers.shared.o \
-	$O/define_derivation_matrices.check.o \
-	$O/euler_angles.shared.o \
-	$O/force_ftz.cc.o \
-	$O/get_attenuation.check.o \
-	$O/get_backazimuth.check.o \
-	$O/get_cmt.check.o \
-	$O/get_event_info.mpicheck.o \
-	$O/get_model_parameters.shared.o \
-	$O/get_timestep_and_layers.shared.o \
-	$O/get_value_parameters.shared.o \
-	$O/gll_library.shared.o \
-	$O/hex_nodes.shared.o \
-	$O/intgrl.shared.o \
-	$O/lagrange_poly.shared.o \
-	$O/locate_receivers.mpicheck.o \
-	$O/locate_sources.mpicheck.o \
-	$O/make_ellipticity.shared.o \
-	$O/make_gravity.shared.o \
-	$O/model_prem.shared.o \
-	$O/model_topo_bathy.sharedmpi.o \
-	$O/multiply_arrays_source.check.o \
-	$O/param_reader.cc.o \
-	$O/spline_routines.shared.o \
-	$O/netlib_specfun_erf.check.o \
-	$O/read_compute_parameters.shared.o \
-	$O/read_parameter_file.shared.o \
-	$O/read_value_parameters.shared.o \
-	$O/recompute_jacobian.check.o \
-	$O/reduce.shared.o \
-	$O/rthetaphi_xyz.shared.o \
-	$O/write_c_binary.cc.o \
-	$O/write_seismograms.mpicheck.o \
-	$O/write_output_ASCII.mpicheck.o \
-	$O/write_output_SAC.mpicheck.o \
-	$O/write_VTK_file.sharedmpi.o \
-	$(EMPTY_MACRO)
-
-# solver objects with statically allocated arrays; dependent upon
-# values_from_mesher.h
-
-SOLVER_ARRAY_OBJECTS = \
-	$O/specfem3D_par.solver.o \
-	$O/check_simulation_stability.mpisolver.o \
-	$O/compute_add_sources.solver.o \
-	$O/compute_boundary_kernel.solvercheck.o \
-	$O/compute_coupling.solver.o \
-	$O/compute_element.solver.o \
-	$O/compute_forces_acoustic.solver.o \
-	$O/compute_forces_elastic.solver.o \
-	$O/compute_forces_crust_mantle.solver.o \
-	$O/compute_forces_crust_mantle_Dev.solver.o \
-	$O/compute_forces_inner_core.solver.o \
-	$O/compute_forces_inner_core_Dev.solver.o \
-	$O/compute_forces_outer_core.solver.o \
-	$O/compute_forces_outer_core_Dev.solver.o \
-	$O/compute_kernels.solver.o \
-	$O/compute_seismograms.solver.o \
-	$O/compute_stacey_crust_mantle.solver.o \
-	$O/compute_stacey_outer_core.solver.o \
-	$O/finalize_simulation.mpisolver.o \
-	$O/initialize_simulation.solver.o \
-	$O/iterate_time.mpisolver.o \
-	$O/noise_tomography.mpisolver.o \
-	$O/prepare_timerun.mpisolver.o \
-	$O/read_arrays_solver.solver.o \
-	$O/read_forward_arrays.solver.o \
-	$O/read_mesh_databases.mpisolver.o \
-	$O/read_topography_bathymetry.mpisolver.o \
-	$O/save_forward_arrays.solver.o \
-	$O/save_kernels.solver.o \
-	$O/setup_GLL_points.mpisolver.o \
-	$O/setup_sources_receivers.mpisolver.o \
-	$O/specfem3D.mpisolver.o \
-	$O/write_movie_output.mpisolver.o \
-	$O/write_movie_volume.mpisolver.o \
-	$O/write_movie_surface.mpisolver.o \
-	$(EMPTY_MACRO)
-
-CUDA_OBJECTS = \
-  $O/assemble_MPI_scalar_cuda.cuda.o \
-	$O/assemble_MPI_vector_cuda.cuda.o \
-  $O/check_fields_cuda.cuda.o \
-	$O/compute_add_sources_elastic_cuda.cuda.o \
-	$O/compute_coupling_cuda.cuda.o \
-	$O/compute_forces_crust_mantle_cuda.cuda.o \
-	$O/compute_forces_inner_core_cuda.cuda.o \
-	$O/compute_forces_outer_core_cuda.cuda.o \
-	$O/compute_kernels_cuda.cuda.o \
-	$O/compute_stacey_acoustic_cuda.cuda.o \
-	$O/compute_stacey_elastic_cuda.cuda.o \
-	$O/initialize_cuda.cuda.o \
-	$O/it_update_displacement_cuda.cuda.o \
-	$O/noise_tomography_cuda.cuda.o \
-	$O/prepare_mesh_constants_cuda.cuda.o \
-	$O/transfer_fields_cuda.cuda.o \
-	$O/write_seismograms_cuda.cuda.o \
-	$O/save_and_compare_cpu_vs_gpu.cudacc.o 
-
-CUDA_STUBS = \
-	$O/specfem3D_gpu_cuda_method_stubs.cudacc.o
-
-CUDA_DEVICE_OBJ = \
-        $O/cuda_device_obj.o \
-        $(EMPTY_MACRO)
-
-LIBSPECFEM_SOLVER = $O/libspecfem_solver.a
-
-# solver also depends on values from mesher
- at COND_CUDA_TRUE@XSPECFEM_OBJECTS_PRE = $(SOLVER_ARRAY_OBJECTS) $O/exit_mpi.sharedmpi.o $(LIBSPECFEM_SOLVER) $(CUDA_OBJECTS)
- at COND_CUDA_FALSE@XSPECFEM_OBJECTS_PRE = $(SOLVER_ARRAY_OBJECTS) $O/exit_mpi.sharedmpi.o $(LIBSPECFEM_SOLVER) $(CUDA_STUBS)
-
-# using ADIOS files
-ADIOS_OBJECTS= \
-	$O/read_attenuation_adios.adios.o \
-	$O/read_mesh_databases_adios.adios.o \
-	$O/read_arrays_solver_adios.adios.o \
-	$O/save_forward_arrays_adios.adios.o \
-	$O/read_forward_arrays_adios.adios.o \
-	$O/write_specfem_adios_header.adios.o \
-	$O/adios_manager.shared_adios.o  \
-	$O/adios_helpers.shared_adios.o
-ADIOS_STUBS = \
-	$O/adios_empty_stubs.noadios.o
-
- at COND_ADIOS_TRUE@XSPECFEM_OBJECTS_ADIOS = $(XSPECFEM_OBJECTS_PRE) $(ADIOS_OBJECTS)
- at COND_ADIOS_FALSE@XSPECFEM_OBJECTS_ADIOS = $(XSPECFEM_OBJECTS_PRE) $(ADIOS_STUBS)
-
-# ADIOS
-# with configure: ./configure --with-cuda ADIOS_LIB=.. ADIOS_INC=.. MPI_INC=..
- at COND_ADIOS_TRUE@ADIOS_LIBS = -ladiosf -lmxml
- at COND_ADIOS_FALSE@ADIOS_LIBS = 
-
-ADIOS_LIB_LOCATION = @ADIOS_LIB@ @MXML_LIB@
-ADIOS_LINK = $(ADIOS_LIB_LOCATION) $(ADIOS_LIBS)
-ADIOS_INC = @ADIOS_INC@ -I../../setup -I../../ 
-
-#@COND_ADIOS_TRUE at CPPFLAGS = -I../../setup @CPPFLAGS@
- at COND_ADIOS_TRUE@MPIFCCOMPILE_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(ADIOS_INC) $(FLAGS_CHECK)
- at COND_ADIOS_TRUE@MPIFCCOMPILE_NO_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(ADIOS_INC) $(FLAGS_NO_CHECK)
-
-
-# vtk visualization
-VTK_OBJECTS = \
-	$O/visual_vtk.visualcc.o
-
-VTK_STUBS = \
-	$O/visual_vtk_stubs.visualc.o
-
- at COND_VTK_TRUE@XSPECFEM_OBJECTS = $(XSPECFEM_OBJECTS_ADIOS) $(VTK_OBJECTS)
- at COND_VTK_FALSE@XSPECFEM_OBJECTS = $(XSPECFEM_OBJECTS_ADIOS) $(VTK_STUBS)
-
- at COND_VTK_TRUE@CPPFLAGS = -I../../setup @CPPFLAGS@ -DWITH_VTK
- at COND_VTK_TRUE@FCCOMPILE_CHECK =@FCENV@ ${FC} ${FCFLAGS} $(FLAGS_CHECK) -DWITH_VTK
- at COND_VTK_TRUE@FCCOMPILE_NO_CHECK =@FCENV@ ${FC} ${FCFLAGS} $(FLAGS_NO_CHECK) -DWITH_VTK
- at COND_VTK_TRUE@MPIFCCOMPILE_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(FLAGS_CHECK) -DWITH_VTK
- at COND_VTK_TRUE@MPIFCCOMPILE_NO_CHECK =@FCENV@ ${MPIFC} ${FCFLAGS} $(FLAGS_NO_CHECK) -DWITH_VTK
-
-#######################################
-
-####
-#### targets
-####
-
-# default targets
-DEFAULT = \
-	xspecfem3D \
-	$(EMPTY_MACRO)
-
-default: $(DEFAULT)
-
-all: clean default
-
-backup:
-	mkdir -p bak
-	cp *f90 *h Makefile bak
-
-bak: backup
-
-#######################################
-
-####
-#### rules for executables
-####
-
-xspecfem3D: $(XSPECFEM_OBJECTS)
- at COND_CUDA_TRUE@@COND_CUDA5_TRUE@	${NVCCLINK} -o $(CUDA_DEVICE_OBJ) $(CUDA_OBJECTS)
- at COND_CUDA_TRUE@@COND_CUDA5_TRUE@	${FCLINK} -o ${E}/xspecfem3D $(XSPECFEM_OBJECTS) $(CUDA_DEVICE_OBJ) $(MPILIBS) $(CUDA_LINK) $(ADIOS_LINK) $(LDFLAGS)
- at COND_CUDA_TRUE@@COND_CUDA5_FALSE@	${FCLINK} -o ${E}/xspecfem3D $(XSPECFEM_OBJECTS) $(MPILIBS) $(CUDA_LINK) $(ADIOS_LINK) $(LDFLAGS)
- at COND_CUDA_FALSE@	${FCLINK} -o ${E}/xspecfem3D $(XSPECFEM_OBJECTS) $(MPILIBS) $(CUDA_LINK) $(ADIOS_LINK) $(LDFLAGS)
-
-reqheader:
-	(cd ../create_header_file; make)
-
-
-clean:
-	rm -f $O/* *.o work.pc* *.mod ${E}/xspecfem3D  \
-      PI*
-
-#######################################
-
-###
-### rule for the archive library
-###
-
-$O/libspecfem_solver.a: $(libspecfem_a_OBJECTS_SOLVER)
-	-rm -f $O/libspecfem_solver.a
-	$(AR) $(ARFLAGS) $O/libspecfem_solver.a $(libspecfem_a_OBJECTS_SOLVER)
-	$(RANLIB) $O/libspecfem_solver.a
-
-
-#######################################
-
-####
-#### rule for each .o file below
-####
-
-##
-## shared
-##
-$O/%.shared.o: ${SHARED}/%.f90 ${SETUP}/constants.h
-	${FCCOMPILE_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-
-$O/%.sharedmpi.o: ${SHARED}/%.f90 ${SETUP}/constants.h
-	${MPIFCCOMPILE_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-
-$O/%.cc.o: ${SHARED}/%.c ${SETUP}/config.h
-	${CC} -c $(CPPFLAGS) $(CFLAGS) -o $@ $< 
-
-#######################################
-
-###
-### specfem3D - optimized flags and dependence on values from mesher here
-###
-$O/%.solver.o: $S/%.f90 ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h
-	${FCCOMPILE_NO_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-
-$O/%.solver.o: $S/%.F90 ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h
-	${FCCOMPILE_NO_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-
-$O/%.solvercheck.o: $S/%.f90 ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h
-	${FCCOMPILE_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-
-$O/%.mpisolver.o: $S/%.f90 ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h
-	${MPIFCCOMPILE_NO_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-
-$O/%.mpisolver.o: $S/%.F90 ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h
-	${MPIFCCOMPILE_NO_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-
-$O/%.mpisolvercheck.o: $S/%.f90 ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h
-	${MPIFCCOMPILE_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-
-#######################################
-
-##
-## specfem3D - non-dependent on values from mesher here
-##
-$O/%.check.o: $S/%.f90 ${SETUP}/constants.h
-	${FCCOMPILE_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-
-$O/%.mpicheckno.o: $S/%.f90 ${SETUP}/constants.h
-	${MPIFCCOMPILE_NO_CHECK} ${FCFLAGS_f90} -c -o $@ $<
-
-$O/%.mpicheck.o: %.f90 ${SETUP}/constants.h
-	${MPIFCCOMPILE_CHECK} -c -o $@ ${FCFLAGS_f90} $<
-
-
-#######################################
-
-###
-### CUDA compilation
-###
-
-$O/%.cuda.o: ${CUDAD}/%.cu ${SETUP}/config.h ${CUDAD}/mesh_constants_cuda.h ${CUDAD}/prepare_constants_cuda.h
-	${NVCC} -c $< -o $@ $(NVCC_FLAGS)
-
-$O/%.cudacc.o: ${CUDAD}/%.c ${SETUP}/config.h
-	${CC} -c $(CPPFLAGS) $(CFLAGS) $(MPI_INC) -o $@ ${CUDAD}/$< 
-
-#######################################
-
-###
-### ADIOS compilation
-###
-
-$O/%.adios.o: %.F90 ${SETUP}/config.h
-	${MPIFC} -c $(ADIOS_INC) $(FCFLAGS) $(MPI_INC) -o $@ $<
-
-$O/%.adios.o: %.f90 ${SETUP}/config.h
-	${MPIFC} -c $(ADIOS_INC) $(FCFLAGS) $(MPI_INC) -o $@ $<
-
-$O/%.noadios.o: %.F90 
-	${FC} -c -o $@ $<
-
-$O/%.noadios.o: %.f90 
-	${FC} -c -o $@ $<
-
-$O/%.shared_adios.o: ${SHARED}/%.f90 ${SETUP}/constants.h
-	${MPIFC} -c $(ADIOS_INC) $(FCFLAGS) $(MPI_INC) -o $@ $<
-
-$O/%.shared_adios.o: ${SHARED}/%.F90 ${SETUP}/constants.h
-	${MPIFC} -c $(ADIOS_INC) $(FCFLAGS) $(MPI_INC) -o $@ $<
-#######################################
-
-###
-### VTK compilation
-###
-
-$O/%.visualcc.o: %.cpp ${SETUP}/config.h
-	${CC} -c $(CPPFLAGS) $(MPI_INC) -o $@ $<
-
-$O/%.visualc.o: %.c ${SETUP}/config.h
-	${CC} -c $(CPPFLAGS) $(MPI_INC) -o $@ $<
-
-
-
-#######################################
-
-###
-### rule for the header file
-###
-
-${OUTPUT}/values_from_mesher.h: reqheader
-	(mkdir -p ${OUTPUT}; cd ${S_TOP}/; ./bin/xcreate_header_file)
-

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,328 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+    npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+    receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
+    ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,vector_assemble,ndim_assemble,iphase_CC)
+
+  implicit none
+
+! standard include of the MPI library
+  include 'mpif.h'
+  include 'constants.h'
+
+! include values created by the mesher
+  include "OUTPUT_FILES/values_from_mesher.h"
+
+! for matching with central cube in inner core
+  integer, intent(in) :: ichunk, nb_msgs_theor_in_cube, npoin2D_cube_from_slices
+  integer, intent(in) :: ndim_assemble
+  integer, intent(in) :: receiver_cube_from_slices
+  integer, intent(inout) :: iphase_CC
+  integer, dimension(nb_msgs_theor_in_cube), intent(in) :: sender_from_slices_to_cube
+  double precision, dimension(npoin2D_cube_from_slices,ndim_assemble), intent(inout) :: buffer_slices
+  double precision, dimension(npoin2D_cube_from_slices,ndim_assemble,nb_msgs_theor_in_cube), intent(inout) :: &
+                                                                                       buffer_all_cube_from_slices
+  integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices), intent(in) :: ibool_central_cube
+
+! local to global mapping
+  integer, intent(in) :: NSPEC2D_BOTTOM_INNER_CORE
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE), intent(in) :: ibool_inner_core
+  integer, dimension(NSPEC_INNER_CORE), intent(in) :: idoubling_inner_core
+  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE), intent(in) :: ibelm_bottom_inner_core
+
+! vector
+  real(kind=CUSTOM_REAL), dimension(ndim_assemble,NGLOB_INNER_CORE), intent(inout) :: vector_assemble
+
+  integer ipoin,idimension, ispec2D, ispec
+  integer i,j,k
+  integer sender,receiver,imsg
+
+  real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: array_central_cube
+
+! MPI status of messages to be received
+  integer, save :: request_send,request_receive
+! maximum value of nb_msgs_theor_in_cube is 5 (when NPROC_XI == 1)
+! therefore NPROC_XI+4 is always large enough
+  integer, dimension(NPROC_XI_VAL+4), save :: request_send_array,request_receive_array
+  logical :: flag_result_test
+  integer, dimension(MPI_STATUS_SIZE) :: msg_status
+  integer :: ier
+
+! mask
+  logical, dimension(NGLOB_INNER_CORE) :: mask
+
+!---
+!---  use buffers to assemble mass matrix with central cube once and for all
+!---
+
+  if(iphase_CC == 1) then
+
+! on chunks AB and AB_ANTIPODE, receive all the messages from slices
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+    do imsg = 1,nb_msgs_theor_in_cube-1
+! receive buffers from slices
+      sender = sender_from_slices_to_cube(imsg)
+      call MPI_IRECV(buffer_all_cube_from_slices(1,1,imsg), &
+                ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
+                itag,MPI_COMM_WORLD,request_receive_array(imsg),ier)
+    enddo
+  endif
+
+! send info to central cube from all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
+  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+! for bottom elements in contact with central cube from the slices side
+    ipoin = 0
+    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+      ispec = ibelm_bottom_inner_core(ispec2D)
+! only for DOFs exactly on surface of central cube (bottom of these elements)
+      k = 1
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ipoin = ipoin + 1
+          buffer_slices(ipoin,:) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
+        enddo
+      enddo
+    enddo
+! send buffer to central cube
+    receiver = receiver_cube_from_slices
+    call MPI_ISEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
+              MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,request_send,ier)
+ endif  ! end sending info to central cube
+
+  iphase_CC = iphase_CC + 1
+  return ! exit because we have started some communications therefore we need some time
+
+  endif !!!!!!!!! end of iphase_CC 1
+
+  if(iphase_CC == 2) then
+
+  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+    call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+    if(.not. flag_result_test) return ! exit if message not sent yet
+  endif
+
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+    do imsg = 1,nb_msgs_theor_in_cube-1
+      call MPI_TEST(request_receive_array(imsg),flag_result_test,msg_status,ier)
+      if(.not. flag_result_test) return ! exit if message not received yet
+    enddo
+  endif
+
+! exchange of their bottom faces between chunks AB and AB_ANTIPODE
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+    ipoin = 0
+    do ispec = NSPEC_INNER_CORE, 1, -1
+      if (idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE) then
+        k = 1
+        do j = 1,NGLLY
+          do i = 1,NGLLX
+            ipoin = ipoin + 1
+            buffer_slices(ipoin,:) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
+          enddo
+        enddo
+      endif
+    enddo
+    sender = sender_from_slices_to_cube(nb_msgs_theor_in_cube)
+!   call MPI_SENDRECV(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
+!       itag,buffer_slices2,ndim_assemble*npoin2D_cube_from_slices,&
+!       MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+
+    call MPI_IRECV(buffer_all_cube_from_slices(1,1,nb_msgs_theor_in_cube), &
+        ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,request_receive,ier)
+!! DK DK this merged with previous statement
+!   buffer_all_cube_from_slices(:,:,nb_msgs_theor_in_cube) = buffer_slices2(:,:)
+
+    call MPI_ISEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
+        itag,MPI_COMM_WORLD,request_send,ier)
+  endif
+
+  iphase_CC = iphase_CC + 1
+  return ! exit because we have started some communications therefore we need some time
+
+  endif !!!!!!!!! end of iphase_CC 2
+
+  if(iphase_CC == 3) then
+
+!--- now we need to assemble the contributions
+
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+    call MPI_TEST(request_send,flag_result_test,msg_status,ier)
+    if(.not. flag_result_test) return ! exit if message not sent yet
+    call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+    if(.not. flag_result_test) return ! exit if message not received yet
+
+    do idimension = 1,ndim_assemble
+! erase contributions to central cube array
+      array_central_cube(:) = 0._CUSTOM_REAL
+
+! use indirect addressing to store contributions only once
+! distinguish between single and double precision for reals
+      do imsg = 1,nb_msgs_theor_in_cube-1
+        do ipoin = 1,npoin2D_cube_from_slices
+          if(CUSTOM_REAL == SIZE_REAL) then
+            array_central_cube(ibool_central_cube(imsg,ipoin)) = sngl(buffer_all_cube_from_slices(ipoin,idimension,imsg))
+          else
+            array_central_cube(ibool_central_cube(imsg,ipoin)) = buffer_all_cube_from_slices(ipoin,idimension,imsg)
+          endif
+        enddo
+      enddo
+! add the constribution of AB or AB_ANTIPODE to sum with the external slices on the edges
+! use a mask to avoid taking the same point into account several times.
+      mask(:) = .false.
+      do ipoin = 1,npoin2D_cube_from_slices
+        if (.not. mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin))) then
+          if(CUSTOM_REAL == SIZE_REAL) then
+            array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
+            array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
+            sngl(buffer_all_cube_from_slices(ipoin,idimension,nb_msgs_theor_in_cube))
+          else
+            array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
+            array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
+            buffer_all_cube_from_slices(ipoin,idimension,nb_msgs_theor_in_cube)
+          endif
+          mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = .true.
+        endif
+      enddo
+
+! suppress degrees of freedom already assembled at top of cube on edges
+      do ispec = 1,NSPEC_INNER_CORE
+        if(idoubling_inner_core(ispec) == IFLAG_TOP_CENTRAL_CUBE) then
+          k = NGLLZ
+          do j = 1,NGLLY
+            do i = 1,NGLLX
+              array_central_cube(ibool_inner_core(i,j,k,ispec)) = 0._CUSTOM_REAL
+            enddo
+          enddo
+        endif
+      enddo
+
+! assemble contributions
+      vector_assemble(idimension,:) = vector_assemble(idimension,:) + array_central_cube(:)
+
+! copy sum back
+      do imsg = 1,nb_msgs_theor_in_cube-1
+        do ipoin = 1,npoin2D_cube_from_slices
+          buffer_all_cube_from_slices(ipoin,idimension,imsg) = vector_assemble(idimension,ibool_central_cube(imsg,ipoin))
+        enddo
+      enddo
+
+    enddo
+
+  endif
+
+!----------
+
+! receive info from central cube on all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
+  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+! receive buffers from slices
+  sender = receiver_cube_from_slices
+  call MPI_IRECV(buffer_slices, &
+              ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
+              itag,MPI_COMM_WORLD,request_receive,ier)
+! for bottom elements in contact with central cube from the slices side
+!   ipoin = 0
+!   do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+!     ispec = ibelm_bottom_inner_core(ispec2D)
+! only for DOFs exactly on surface of central cube (bottom of these elements)
+!     k = 1
+!     do j = 1,NGLLY
+!       do i = 1,NGLLX
+!         ipoin = ipoin + 1
+! distinguish between single and double precision for reals
+!         if(CUSTOM_REAL == SIZE_REAL) then
+!           vector_assemble(:,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,:))
+!         else
+!           vector_assemble(:,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,:)
+!         endif
+!       enddo
+!     enddo
+!   enddo
+ endif  ! end receiving info from central cube
+
+!------- send info back from central cube to slices
+
+! on chunk AB & CHUNK_AB_ANTIPODE, send all the messages to slices
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+    do imsg = 1,nb_msgs_theor_in_cube-1
+! send buffers to slices
+      receiver = sender_from_slices_to_cube(imsg)
+      call MPI_ISEND(buffer_all_cube_from_slices(1,1,imsg),ndim_assemble*npoin2D_cube_from_slices, &
+              MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,request_send_array(imsg),ier)
+    enddo
+  endif
+
+  iphase_CC = iphase_CC + 1
+  return ! exit because we have started some communications therefore we need some time
+
+  endif !!!!!!!!! end of iphase_CC 3
+
+  if(iphase_CC == 4) then
+
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+    do imsg = 1,nb_msgs_theor_in_cube-1
+      call MPI_TEST(request_send_array(imsg),flag_result_test,msg_status,ier)
+      if(.not. flag_result_test) return ! exit if message not sent yet
+    enddo
+  endif
+
+  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+    call MPI_TEST(request_receive,flag_result_test,msg_status,ier)
+    if(.not. flag_result_test) return ! exit if message not received yet
+  endif
+
+! receive info from central cube on all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
+  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+! for bottom elements in contact with central cube from the slices side
+    ipoin = 0
+    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+      ispec = ibelm_bottom_inner_core(ispec2D)
+! only for DOFs exactly on surface of central cube (bottom of these elements)
+      k = 1
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ipoin = ipoin + 1
+! distinguish between single and double precision for reals
+          if(CUSTOM_REAL == SIZE_REAL) then
+            vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,:))
+          else
+            vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,:)
+          endif
+        enddo
+      enddo
+    enddo
+ endif  ! end receiving info from central cube
+
+! this is the exit condition, to go beyond the last phase number
+  iphase_CC = iphase_CC + 1
+
+  endif !!!!!!!!! end of iphase_CC 4
+
+  end subroutine assemble_MPI_central_cube
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube_block.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube_block.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_central_cube_block.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,263 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+subroutine assemble_MPI_central_cube_block(ichunk,nb_msgs_theor_in_cube, sender_from_slices_to_cube, &
+  npoin2D_cube_from_slices, buffer_all_cube_from_slices, buffer_slices, buffer_slices2, ibool_central_cube, &
+  receiver_cube_from_slices, ibool_inner_core, idoubling_inner_core, NSPEC_INNER_CORE, &
+  ibelm_bottom_inner_core, NSPEC2D_BOTTOM_INNER_CORE,NGLOB_INNER_CORE,vector_assemble,ndim_assemble)
+
+! this version of the routine is based on blocking MPI calls
+
+  implicit none
+
+! standard include of the MPI library
+  include 'mpif.h'
+  include 'constants.h'
+
+! for matching with central cube in inner core
+  integer ichunk, nb_msgs_theor_in_cube, npoin2D_cube_from_slices
+  integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
+  double precision, dimension(npoin2D_cube_from_slices,NDIM) :: buffer_slices,buffer_slices2
+  double precision, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM) :: buffer_all_cube_from_slices
+  integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
+  integer receiver_cube_from_slices
+
+! local to global mapping
+  integer NSPEC_INNER_CORE,NSPEC2D_BOTTOM_INNER_CORE, NGLOB_INNER_CORE
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
+  integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
+  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
+
+! vector
+  integer ndim_assemble
+  real(kind=CUSTOM_REAL), dimension(ndim_assemble,NGLOB_INNER_CORE) :: vector_assemble
+
+  integer ipoin,idimension, ispec2D, ispec
+  integer i,j,k
+  integer sender,receiver,imsg
+
+  real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: array_central_cube
+
+! MPI status of messages to be received
+  integer msg_status(MPI_STATUS_SIZE), ier
+
+! mask
+  logical, dimension(NGLOB_INNER_CORE) :: mask
+
+!---
+!---  now use buffers to assemble mass matrix with central cube once and for all
+!---
+
+! on chunks AB and AB_ANTIPODE, receive all the messages from slices
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+    do imsg = 1,nb_msgs_theor_in_cube-1
+
+! receive buffers from slices
+    sender = sender_from_slices_to_cube(imsg)
+    call MPI_RECV(buffer_slices, &
+                ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
+                itag,MPI_COMM_WORLD,msg_status,ier)
+
+! copy buffer in 2D array for each slice
+    buffer_all_cube_from_slices(imsg,:,1:ndim_assemble) = buffer_slices(:,1:ndim_assemble)
+
+    enddo
+  endif
+
+! send info to central cube from all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
+  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+
+! for bottom elements in contact with central cube from the slices side
+    ipoin = 0
+    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+
+      ispec = ibelm_bottom_inner_core(ispec2D)
+
+! only for DOFs exactly on surface of central cube (bottom of these elements)
+      k = 1
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ipoin = ipoin + 1
+          buffer_slices(ipoin,1:ndim_assemble) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
+        enddo
+      enddo
+    enddo
+
+! send buffer to central cube
+    receiver = receiver_cube_from_slices
+    call MPI_SEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
+              MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,ier)
+
+ endif  ! end sending info to central cube
+
+
+! exchange of their bottom faces between chunks AB and AB_ANTIPODE
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+    ipoin = 0
+    do ispec = NSPEC_INNER_CORE, 1, -1
+      if (idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE) then
+        k = 1
+        do j = 1,NGLLY
+          do i = 1,NGLLX
+            ipoin = ipoin + 1
+            buffer_slices(ipoin,1:ndim_assemble) = dble(vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)))
+          enddo
+        enddo
+      endif
+    enddo
+
+    sender = sender_from_slices_to_cube(nb_msgs_theor_in_cube)
+
+    call MPI_SENDRECV(buffer_slices,ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
+        itag,buffer_slices2,ndim_assemble*npoin2D_cube_from_slices,&
+        MPI_DOUBLE_PRECISION,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+
+   buffer_all_cube_from_slices(nb_msgs_theor_in_cube,:,1:ndim_assemble) = buffer_slices2(:,1:ndim_assemble)
+
+  endif
+
+!--- now we need to assemble the contributions
+
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+    do idimension = 1,ndim_assemble
+! erase contributions to central cube array
+      array_central_cube(:) = 0._CUSTOM_REAL
+
+! use indirect addressing to store contributions only once
+! distinguish between single and double precision for reals
+      do imsg = 1,nb_msgs_theor_in_cube-1
+        do ipoin = 1,npoin2D_cube_from_slices
+          if(CUSTOM_REAL == SIZE_REAL) then
+            array_central_cube(ibool_central_cube(imsg,ipoin)) = sngl(buffer_all_cube_from_slices(imsg,ipoin,idimension))
+          else
+            array_central_cube(ibool_central_cube(imsg,ipoin)) = buffer_all_cube_from_slices(imsg,ipoin,idimension)
+          endif
+        enddo
+      enddo
+! add the constribution of AB or AB_ANTIPODE to sum with the external slices on the edges
+! use a mask to avoid taking the same point into account several times.
+      mask(:) = .false.
+      do ipoin = 1,npoin2D_cube_from_slices
+        if (.not. mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin))) then
+          if(CUSTOM_REAL == SIZE_REAL) then
+            array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
+            array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
+            sngl(buffer_all_cube_from_slices(nb_msgs_theor_in_cube,ipoin,idimension))
+          else
+            array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = &
+            array_central_cube(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) + &
+            buffer_all_cube_from_slices(nb_msgs_theor_in_cube,ipoin,idimension)
+          endif
+          mask(ibool_central_cube(nb_msgs_theor_in_cube,ipoin)) = .true.
+        endif
+      enddo
+
+! suppress degrees of freedom already assembled at top of cube on edges
+      do ispec = 1,NSPEC_INNER_CORE
+        if(idoubling_inner_core(ispec) == IFLAG_TOP_CENTRAL_CUBE) then
+          k = NGLLZ
+          do j = 1,NGLLY
+            do i = 1,NGLLX
+              array_central_cube(ibool_inner_core(i,j,k,ispec)) = 0._CUSTOM_REAL
+            enddo
+          enddo
+        endif
+      enddo
+
+! assemble contributions
+      vector_assemble(idimension,:) = vector_assemble(idimension,:) + array_central_cube(:)
+
+! copy sum back
+      do imsg = 1,nb_msgs_theor_in_cube-1
+        do ipoin = 1,npoin2D_cube_from_slices
+          buffer_all_cube_from_slices(imsg,ipoin,idimension) = vector_assemble(idimension,ibool_central_cube(imsg,ipoin))
+        enddo
+      enddo
+
+    enddo
+
+  endif
+
+!----------
+
+! receive info from central cube on all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
+  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+
+! receive buffers from slices
+  sender = receiver_cube_from_slices
+  call MPI_RECV(buffer_slices, &
+              ndim_assemble*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
+              itag,MPI_COMM_WORLD,msg_status,ier)
+
+! for bottom elements in contact with central cube from the slices side
+    ipoin = 0
+    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+
+      ispec = ibelm_bottom_inner_core(ispec2D)
+
+! only for DOFs exactly on surface of central cube (bottom of these elements)
+      k = 1
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ipoin = ipoin + 1
+
+! distinguish between single and double precision for reals
+          if(CUSTOM_REAL == SIZE_REAL) then
+            vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = sngl(buffer_slices(ipoin,1:ndim_assemble))
+          else
+            vector_assemble(1:ndim_assemble,ibool_inner_core(i,j,k,ispec)) = buffer_slices(ipoin,1:ndim_assemble)
+          endif
+
+        enddo
+      enddo
+    enddo
+
+ endif  ! end receiving info from central cube
+
+!------- send info back from central cube to slices
+
+! on chunk AB & CHUNK_AB_ANTIPODE, send all the messages to slices
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+   do imsg = 1,nb_msgs_theor_in_cube-1
+
+! copy buffer in 2D array for each slice
+   buffer_slices(:,1:ndim_assemble) = buffer_all_cube_from_slices(imsg,:,1:ndim_assemble)
+
+! send buffers to slices
+    receiver = sender_from_slices_to_cube(imsg)
+    call MPI_SEND(buffer_slices,ndim_assemble*npoin2D_cube_from_slices, &
+              MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,ier)
+
+   enddo
+   endif
+
+end subroutine assemble_MPI_central_cube_block
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar_block.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar_block.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_scalar_block.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,440 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!----
+!---- assemble the contributions between slices and chunks using MPI
+!----
+
+  subroutine assemble_MPI_scalar_block(myrank,array_val,nglob, &
+            iproc_xi,iproc_eta,ichunk,addressing, &
+            iboolleft_xi,iboolright_xi,iboolleft_eta,iboolright_eta, &
+            npoin2D_faces,npoin2D_xi,npoin2D_eta, &
+            iboolfaces,iboolcorner, &
+            iprocfrom_faces,iprocto_faces,imsg_type, &
+            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+            buffer_send_faces_scalar,buffer_received_faces_scalar,npoin2D_max_all_CM_IC, &
+            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
+            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
+            NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL, &
+            NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX, &
+            NGLOB2DMAX_XY,NCHUNKS)
+
+! this version of the routine is based on blocking MPI calls
+
+  implicit none
+
+! standard include of the MPI library
+  include 'mpif.h'
+
+  include "constants.h"
+  include "precision.h"
+
+  integer myrank,nglob,NCHUNKS
+
+! array to assemble
+  real(kind=CUSTOM_REAL), dimension(nglob) :: array_val
+
+  integer iproc_xi,iproc_eta,ichunk
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi,npoin2D_eta
+  integer npoin2D_faces(NUMFACES_SHARED)
+
+  integer NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY
+  integer NPROC_XI,NPROC_ETA,NGLOB1D_RADIAL
+  integer NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS
+
+! for addressing of the slices
+  integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1) :: addressing
+
+! 2-D addressing and buffers for summation between slices
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX) :: iboolleft_xi,iboolright_xi
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX) :: iboolleft_eta,iboolright_eta
+
+! indirect addressing for each corner of the chunks
+  integer, dimension(NGLOB1D_RADIAL,NUMCORNERS_SHARED) :: iboolcorner
+  integer icount_corners
+
+  integer :: npoin2D_max_all_CM_IC
+  integer, dimension(NGLOB2DMAX_XY,NUMFACES_SHARED) :: iboolfaces
+  real(kind=CUSTOM_REAL), dimension(npoin2D_max_all_CM_IC) :: buffer_send_faces_scalar,buffer_received_faces_scalar
+
+! buffers for send and receive between corners of the chunks
+  real(kind=CUSTOM_REAL), dimension(NGLOB1D_RADIAL) :: buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar
+
+! ---- arrays to assemble between chunks
+
+! communication pattern for faces between chunks
+  integer, dimension(NUMMSGS_FACES) :: iprocfrom_faces,iprocto_faces,imsg_type
+
+! communication pattern for corners between chunks
+  integer, dimension(NCORNERSCHUNKS) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+! MPI status of messages to be received
+  integer msg_status(MPI_STATUS_SIZE)
+
+  integer ipoin,ipoin2D,ipoin1D
+  integer sender,receiver,ier
+  integer imsg,imsg_loop
+  integer icount_faces,npoin2D_chunks
+
+! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
+
+! check flag to see if we need to assemble (might be turned off when debugging)
+  if (.not. ACTUALLY_ASSEMBLE_MPI_SLICES) return
+
+! here we have to assemble all the contributions between slices using MPI
+
+!----
+!---- assemble the contributions between slices using MPI
+!----
+
+!----
+!---- first assemble along xi using the 2-D topology
+!----
+
+! assemble along xi only if more than one slice
+  if(NPROC_XI > 1) then
+
+! slices copy the right face into the buffer
+  do ipoin=1,npoin2D_xi(2)
+    buffer_send_faces_scalar(ipoin) = array_val(iboolright_xi(ipoin))
+  enddo
+
+! send messages forward along each row
+  if(iproc_xi == 0) then
+    sender = MPI_PROC_NULL
+  else
+    sender = addressing(ichunk,iproc_xi - 1,iproc_eta)
+  endif
+  if(iproc_xi == NPROC_XI-1) then
+    receiver = MPI_PROC_NULL
+  else
+    receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
+  endif
+  call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,receiver, &
+        itag2,buffer_received_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,sender, &
+        itag,MPI_COMM_WORLD,msg_status,ier)
+
+! all slices add the buffer received to the contributions on the left face
+  if(iproc_xi > 0) then
+  do ipoin=1,npoin2D_xi(1)
+    array_val(iboolleft_xi(ipoin)) = array_val(iboolleft_xi(ipoin)) + &
+                              buffer_received_faces_scalar(ipoin)
+  enddo
+  endif
+
+! the contributions are correctly assembled on the left side of each slice
+! now we have to send the result back to the sender
+! all slices copy the left face into the buffer
+  do ipoin=1,npoin2D_xi(1)
+    buffer_send_faces_scalar(ipoin) = array_val(iboolleft_xi(ipoin))
+  enddo
+
+! send messages backward along each row
+  if(iproc_xi == NPROC_XI-1) then
+    sender = MPI_PROC_NULL
+  else
+    sender = addressing(ichunk,iproc_xi + 1,iproc_eta)
+  endif
+  if(iproc_xi == 0) then
+    receiver = MPI_PROC_NULL
+  else
+    receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
+  endif
+  call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_xi(1),CUSTOM_MPI_TYPE,receiver, &
+        itag2,buffer_received_faces_scalar,npoin2D_xi(2),CUSTOM_MPI_TYPE,sender, &
+        itag,MPI_COMM_WORLD,msg_status,ier)
+
+! all slices copy the buffer received to the contributions on the right face
+  if(iproc_xi < NPROC_XI-1) then
+  do ipoin=1,npoin2D_xi(2)
+    array_val(iboolright_xi(ipoin)) = buffer_received_faces_scalar(ipoin)
+  enddo
+  endif
+
+  endif
+
+!----
+!---- then assemble along eta using the 2-D topology
+!----
+
+! assemble along eta only if more than one slice
+  if(NPROC_ETA > 1) then
+
+! slices copy the right face into the buffer
+  do ipoin=1,npoin2D_eta(2)
+    buffer_send_faces_scalar(ipoin) = array_val(iboolright_eta(ipoin))
+  enddo
+
+! send messages forward along each row
+  if(iproc_eta == 0) then
+    sender = MPI_PROC_NULL
+  else
+    sender = addressing(ichunk,iproc_xi,iproc_eta - 1)
+  endif
+  if(iproc_eta == NPROC_ETA-1) then
+    receiver = MPI_PROC_NULL
+  else
+    receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
+  endif
+  call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,receiver, &
+    itag2,buffer_received_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,sender, &
+    itag,MPI_COMM_WORLD,msg_status,ier)
+
+! all slices add the buffer received to the contributions on the left face
+  if(iproc_eta > 0) then
+  do ipoin=1,npoin2D_eta(1)
+    array_val(iboolleft_eta(ipoin)) = array_val(iboolleft_eta(ipoin)) + &
+                              buffer_received_faces_scalar(ipoin)
+  enddo
+  endif
+
+! the contributions are correctly assembled on the left side of each slice
+! now we have to send the result back to the sender
+! all slices copy the left face into the buffer
+  do ipoin=1,npoin2D_eta(1)
+    buffer_send_faces_scalar(ipoin) = array_val(iboolleft_eta(ipoin))
+  enddo
+
+! send messages backward along each row
+  if(iproc_eta == NPROC_ETA-1) then
+    sender = MPI_PROC_NULL
+  else
+    sender = addressing(ichunk,iproc_xi,iproc_eta + 1)
+  endif
+  if(iproc_eta == 0) then
+    receiver = MPI_PROC_NULL
+  else
+    receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
+  endif
+  call MPI_SENDRECV(buffer_send_faces_scalar,npoin2D_eta(1),CUSTOM_MPI_TYPE,receiver, &
+    itag2,buffer_received_faces_scalar,npoin2D_eta(2),CUSTOM_MPI_TYPE,sender, &
+    itag,MPI_COMM_WORLD,msg_status,ier)
+
+! all slices copy the buffer received to the contributions on the right face
+  if(iproc_eta < NPROC_ETA-1) then
+  do ipoin=1,npoin2D_eta(2)
+    array_val(iboolright_eta(ipoin)) = buffer_received_faces_scalar(ipoin)
+  enddo
+  endif
+
+  endif
+
+!----
+!---- start MPI assembling phase between chunks
+!----
+
+! check flag to see if we need to assemble (might be turned off when debugging)
+! and do not assemble if only one chunk
+  if (.not. ACTUALLY_ASSEMBLE_MPI_CHUNKS .or. NCHUNKS == 1) return
+
+! ***************************************************************
+!  transmit messages in forward direction (iprocfrom -> iprocto)
+! ***************************************************************
+
+!---- put slices in receive mode
+!---- a given slice can belong to at most two faces
+
+! use three step scheme that can never deadlock
+! scheme for faces cannot deadlock even if NPROC_XI = NPROC_ETA = 1
+  do imsg_loop = 1,NUM_MSG_TYPES
+
+  icount_faces = 0
+  do imsg = 1,NUMMSGS_FACES
+  if(myrank==iprocfrom_faces(imsg) .or. &
+       myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+  if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+    sender = iprocfrom_faces(imsg)
+    npoin2D_chunks = npoin2D_faces(icount_faces)
+    call MPI_RECV(buffer_received_faces_scalar, &
+              npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
+              itag,MPI_COMM_WORLD,msg_status,ier)
+    do ipoin2D=1,npoin2D_chunks
+      array_val(iboolfaces(ipoin2D,icount_faces)) = &
+         array_val(iboolfaces(ipoin2D,icount_faces)) + buffer_received_faces_scalar(ipoin2D)
+    enddo
+  endif
+  enddo
+
+!---- put slices in send mode
+!---- a given slice can belong to at most two faces
+  icount_faces = 0
+  do imsg = 1,NUMMSGS_FACES
+  if(myrank==iprocfrom_faces(imsg) .or. &
+       myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+  if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+    receiver = iprocto_faces(imsg)
+    npoin2D_chunks = npoin2D_faces(icount_faces)
+    do ipoin2D=1,npoin2D_chunks
+      buffer_send_faces_scalar(ipoin2D) = array_val(iboolfaces(ipoin2D,icount_faces))
+    enddo
+    call MPI_SEND(buffer_send_faces_scalar,npoin2D_chunks, &
+              CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+  endif
+  enddo
+
+! *********************************************************************
+!  transmit messages back in opposite direction (iprocto -> iprocfrom)
+! *********************************************************************
+
+!---- put slices in receive mode
+!---- a given slice can belong to at most two faces
+
+  icount_faces = 0
+  do imsg = 1,NUMMSGS_FACES
+  if(myrank==iprocfrom_faces(imsg) .or. &
+       myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+  if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+    sender = iprocto_faces(imsg)
+    npoin2D_chunks = npoin2D_faces(icount_faces)
+    call MPI_RECV(buffer_received_faces_scalar, &
+              npoin2D_chunks,CUSTOM_MPI_TYPE,sender, &
+              itag,MPI_COMM_WORLD,msg_status,ier)
+    do ipoin2D=1,npoin2D_chunks
+      array_val(iboolfaces(ipoin2D,icount_faces)) = buffer_received_faces_scalar(ipoin2D)
+    enddo
+  endif
+  enddo
+
+!---- put slices in send mode
+!---- a given slice can belong to at most two faces
+  icount_faces = 0
+  do imsg = 1,NUMMSGS_FACES
+  if(myrank==iprocfrom_faces(imsg) .or. &
+       myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+  if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+    receiver = iprocfrom_faces(imsg)
+    npoin2D_chunks = npoin2D_faces(icount_faces)
+    do ipoin2D=1,npoin2D_chunks
+      buffer_send_faces_scalar(ipoin2D) = array_val(iboolfaces(ipoin2D,icount_faces))
+    enddo
+    call MPI_SEND(buffer_send_faces_scalar,npoin2D_chunks, &
+              CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+  endif
+  enddo
+
+! end of anti-deadlocking loop
+  enddo
+
+!----
+!---- start MPI assembling corners
+!----
+
+! scheme for corners cannot deadlock even if NPROC_XI = NPROC_ETA = 1
+
+! ***************************************************************
+!  transmit messages in forward direction (two workers -> master)
+! ***************************************************************
+
+  icount_corners = 0
+
+  do imsg = 1,NCORNERSCHUNKS
+
+  if(myrank == iproc_master_corners(imsg) .or. &
+     myrank == iproc_worker1_corners(imsg) .or. &
+     (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) icount_corners = icount_corners + 1
+
+!---- receive messages from the two workers on the master
+  if(myrank==iproc_master_corners(imsg)) then
+
+! receive from worker #1 and add to local array
+    sender = iproc_worker1_corners(imsg)
+    call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
+          CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+    do ipoin1D=1,NGLOB1D_RADIAL
+      array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_scalar(ipoin1D)
+    enddo
+
+! receive from worker #2 and add to local array
+  if(NCHUNKS /= 2) then
+    sender = iproc_worker2_corners(imsg)
+    call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
+          CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+    do ipoin1D=1,NGLOB1D_RADIAL
+      array_val(iboolcorner(ipoin1D,icount_corners)) = array_val(iboolcorner(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_scalar(ipoin1D)
+    enddo
+  endif
+
+  endif
+
+!---- send messages from the two workers to the master
+  if(myrank==iproc_worker1_corners(imsg) .or. &
+              (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+
+    receiver = iproc_master_corners(imsg)
+    do ipoin1D=1,NGLOB1D_RADIAL
+      buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
+    enddo
+    call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
+              receiver,itag,MPI_COMM_WORLD,ier)
+
+  endif
+
+! *********************************************************************
+!  transmit messages back in opposite direction (master -> two workers)
+! *********************************************************************
+
+!---- receive messages from the master on the two workers
+  if(myrank==iproc_worker1_corners(imsg) .or. &
+              (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+
+! receive from master and copy to local array
+    sender = iproc_master_corners(imsg)
+    call MPI_RECV(buffer_recv_chunkcorn_scalar,NGLOB1D_RADIAL, &
+          CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+    do ipoin1D=1,NGLOB1D_RADIAL
+      array_val(iboolcorner(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_scalar(ipoin1D)
+    enddo
+
+  endif
+
+!---- send messages from the master to the two workers
+  if(myrank==iproc_master_corners(imsg)) then
+
+    do ipoin1D=1,NGLOB1D_RADIAL
+      buffer_send_chunkcorn_scalar(ipoin1D) = array_val(iboolcorner(ipoin1D,icount_corners))
+    enddo
+
+! send to worker #1
+    receiver = iproc_worker1_corners(imsg)
+    call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
+              receiver,itag,MPI_COMM_WORLD,ier)
+
+! send to worker #2
+  if(NCHUNKS /= 2) then
+    receiver = iproc_worker2_corners(imsg)
+    call MPI_SEND(buffer_send_chunkcorn_scalar,NGLOB1D_RADIAL,CUSTOM_MPI_TYPE, &
+              receiver,itag,MPI_COMM_WORLD,ier)
+  endif
+
+  endif
+
+  enddo
+
+  end subroutine assemble_MPI_scalar_block
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector_block.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector_block.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/assemble_MPI_vector_block.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,706 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!----
+!---- assemble the contributions between slices and chunks using MPI
+!---- we handle two regions (crust/mantle and inner core) in the same MPI call
+!---- to reduce the total number of MPI calls
+!----
+
+  subroutine assemble_MPI_vector_block(myrank, &
+            accel_crust_mantle,accel_inner_core, &
+            iproc_xi,iproc_eta,ichunk,addressing, &
+            iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+            npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+            iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+            iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+            npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+            iboolfaces_inner_core,iboolcorner_inner_core, &
+            iprocfrom_faces,iprocto_faces,imsg_type, &
+            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+            buffer_send_faces_vector,buffer_received_faces_vector, &
+            buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
+            NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS, &
+            NPROC_XI,NPROC_ETA, &
+            NGLOB1D_RADIAL_crust_mantle,NGLOB1D_RADIAL_inner_core, &
+            NGLOB2DMAX_XY,NCHUNKS)
+
+! this version of the routine is based on blocking MPI calls
+
+  implicit none
+
+! standard include of the MPI library
+  include 'mpif.h'
+
+  include "constants.h"
+  include "precision.h"
+
+! include values created by the mesher
+  include "OUTPUT_FILES/values_from_mesher.h"
+
+  integer myrank,NCHUNKS
+
+! the two arrays to assemble
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: accel_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: accel_inner_core
+
+  integer iproc_xi,iproc_eta,ichunk
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle
+  integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_inner_core,npoin2D_eta_inner_core
+  integer npoin2D_faces_inner_core(NUMFACES_SHARED)
+
+  integer NGLOB1D_RADIAL_crust_mantle,NGLOB1D_RADIAL_inner_core
+  integer NPROC_XI,NPROC_ETA,NGLOB2DMAX_XY
+  integer NUMMSGS_FACES,NUM_MSG_TYPES,NCORNERSCHUNKS
+
+! for addressing of the slices
+  integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1) :: addressing
+
+! 2-D addressing and buffers for summation between slices
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
+
+! indirect addressing for each corner of the chunks
+  integer, dimension(NGLOB1D_RADIAL_crust_mantle,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
+  integer, dimension(NGLOB1D_RADIAL_inner_core,NUMCORNERS_SHARED) :: iboolcorner_inner_core
+  integer icount_corners
+
+  integer, dimension(NGLOB2DMAX_XY_CM_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
+  integer, dimension(NGLOB2DMAX_XY_IC_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
+! size of buffers is multiplied by 2 because we handle two regions in the same MPI call
+  real(kind=CUSTOM_REAL), dimension(NDIM,2*NGLOB2DMAX_XY) :: buffer_send_faces_vector,buffer_received_faces_vector
+
+! buffers for send and receive between corners of the chunks
+! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_crust_mantle + NGLOB1D_RADIAL_inner_core) :: &
+    buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector
+
+! ---- arrays to assemble between chunks
+
+! communication pattern for faces between chunks
+  integer, dimension(NUMMSGS_FACES) :: iprocfrom_faces,iprocto_faces,imsg_type
+
+! communication pattern for corners between chunks
+  integer, dimension(NCORNERSCHUNKS) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+! MPI status of messages to be received
+  integer msg_status(MPI_STATUS_SIZE)
+
+  integer ipoin,ipoin2D,ipoin1D
+  integer sender,receiver,ier
+  integer imsg,imsg_loop
+  integer icount_faces,npoin2D_chunks_all
+
+  integer :: NGLOB1D_RADIAL_all,ioffset
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_all,npoin2D_eta_all
+! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
+
+! check flag to see if we need to assemble (might be turned off when debugging)
+  if (.not. ACTUALLY_ASSEMBLE_MPI_SLICES) return
+
+! here we have to assemble all the contributions between slices using MPI
+
+! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+  npoin2D_xi_all(:) = npoin2D_xi_crust_mantle(:) + npoin2D_xi_inner_core(:)
+  npoin2D_eta_all(:) = npoin2D_eta_crust_mantle(:) + npoin2D_eta_inner_core(:)
+
+!----
+!---- assemble the contributions between slices using MPI
+!----
+
+!----
+!---- first assemble along xi using the 2-D topology
+!----
+
+! assemble along xi only if more than one slice
+  if(NPROC_XI > 1) then
+
+! slices copy the right face into the buffer
+! the buffer for the inner core starts right after the buffer for the crust and mantle
+  ioffset = npoin2D_xi_crust_mantle(2)
+
+  do ipoin = 1,npoin2D_xi_crust_mantle(2)
+    buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(1,iboolright_xi_crust_mantle(ipoin))
+    buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolright_xi_crust_mantle(ipoin))
+    buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolright_xi_crust_mantle(ipoin))
+  enddo
+
+  do ipoin = 1,npoin2D_xi_inner_core(2)
+    buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(1,iboolright_xi_inner_core(ipoin))
+    buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolright_xi_inner_core(ipoin))
+    buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolright_xi_inner_core(ipoin))
+  enddo
+
+! send messages forward along each row
+  if(iproc_xi == 0) then
+    sender = MPI_PROC_NULL
+  else
+    sender = addressing(ichunk,iproc_xi - 1,iproc_eta)
+  endif
+  if(iproc_xi == NPROC_XI-1) then
+    receiver = MPI_PROC_NULL
+  else
+    receiver = addressing(ichunk,iproc_xi + 1,iproc_eta)
+  endif
+
+  call MPI_SENDRECV(buffer_send_faces_vector,NDIM*npoin2D_xi_all(2),CUSTOM_MPI_TYPE,receiver, &
+        itag2,buffer_received_faces_vector,NDIM*npoin2D_xi_all(1),CUSTOM_MPI_TYPE,sender, &
+        itag,MPI_COMM_WORLD,msg_status,ier)
+
+! all slices add the buffer received to the contributions on the left face
+  if(iproc_xi > 0) then
+
+  do ipoin = 1,npoin2D_xi_crust_mantle(1)
+    accel_crust_mantle(1,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(1,iboolleft_xi_crust_mantle(ipoin)) + &
+                              buffer_received_faces_vector(1,ipoin)
+    accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin)) + &
+                              buffer_received_faces_vector(2,ipoin)
+    accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin)) = accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin)) + &
+                              buffer_received_faces_vector(3,ipoin)
+  enddo
+
+  ioffset = npoin2D_xi_crust_mantle(1)
+  do ipoin = 1,npoin2D_xi_inner_core(1)
+    accel_inner_core(1,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(1,iboolleft_xi_inner_core(ipoin)) + &
+                              buffer_received_faces_vector(1,ioffset + ipoin)
+    accel_inner_core(2,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(2,iboolleft_xi_inner_core(ipoin)) + &
+                              buffer_received_faces_vector(2,ioffset + ipoin)
+    accel_inner_core(3,iboolleft_xi_inner_core(ipoin)) = accel_inner_core(3,iboolleft_xi_inner_core(ipoin)) + &
+                              buffer_received_faces_vector(3,ioffset + ipoin)
+  enddo
+
+  endif
+
+! the contributions are correctly assembled on the left side of each slice
+! now we have to send the result back to the sender
+! all slices copy the left face into the buffer
+! the buffer for the inner core starts right after the buffer for the crust and mantle
+  ioffset = npoin2D_xi_crust_mantle(1)
+
+  do ipoin = 1,npoin2D_xi_crust_mantle(1)
+    buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(1,iboolleft_xi_crust_mantle(ipoin))
+    buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolleft_xi_crust_mantle(ipoin))
+    buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolleft_xi_crust_mantle(ipoin))
+  enddo
+
+  do ipoin = 1,npoin2D_xi_inner_core(1)
+    buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(1,iboolleft_xi_inner_core(ipoin))
+    buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolleft_xi_inner_core(ipoin))
+    buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolleft_xi_inner_core(ipoin))
+  enddo
+
+! send messages backward along each row
+  if(iproc_xi == NPROC_XI-1) then
+    sender = MPI_PROC_NULL
+  else
+    sender = addressing(ichunk,iproc_xi + 1,iproc_eta)
+  endif
+  if(iproc_xi == 0) then
+    receiver = MPI_PROC_NULL
+  else
+    receiver = addressing(ichunk,iproc_xi - 1,iproc_eta)
+  endif
+  call MPI_SENDRECV(buffer_send_faces_vector,NDIM*npoin2D_xi_all(1),CUSTOM_MPI_TYPE,receiver, &
+        itag2,buffer_received_faces_vector,NDIM*npoin2D_xi_all(2),CUSTOM_MPI_TYPE,sender, &
+        itag,MPI_COMM_WORLD,msg_status,ier)
+
+! all slices copy the buffer received to the contributions on the right face
+  if(iproc_xi < NPROC_XI-1) then
+
+  do ipoin = 1,npoin2D_xi_crust_mantle(2)
+    accel_crust_mantle(1,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(1,ipoin)
+    accel_crust_mantle(2,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(2,ipoin)
+    accel_crust_mantle(3,iboolright_xi_crust_mantle(ipoin)) = buffer_received_faces_vector(3,ipoin)
+  enddo
+
+  ioffset = npoin2D_xi_crust_mantle(2)
+  do ipoin = 1,npoin2D_xi_inner_core(2)
+    accel_inner_core(1,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(1,ioffset + ipoin)
+    accel_inner_core(2,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(2,ioffset + ipoin)
+    accel_inner_core(3,iboolright_xi_inner_core(ipoin)) = buffer_received_faces_vector(3,ioffset + ipoin)
+  enddo
+
+  endif
+
+  endif
+
+!----
+!---- then assemble along eta using the 2-D topology
+!----
+
+! assemble along eta only if more than one slice
+  if(NPROC_ETA > 1) then
+
+! the buffer for the inner core starts right after the buffer for the crust and mantle
+  ioffset = npoin2D_eta_crust_mantle(2)
+
+! slices copy the right face into the buffer
+  do ipoin = 1,npoin2D_eta_crust_mantle(2)
+    buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(1,iboolright_eta_crust_mantle(ipoin))
+    buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolright_eta_crust_mantle(ipoin))
+    buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolright_eta_crust_mantle(ipoin))
+  enddo
+
+  do ipoin = 1,npoin2D_eta_inner_core(2)
+    buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(1,iboolright_eta_inner_core(ipoin))
+    buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolright_eta_inner_core(ipoin))
+    buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolright_eta_inner_core(ipoin))
+  enddo
+
+! send messages forward along each row
+  if(iproc_eta == 0) then
+    sender = MPI_PROC_NULL
+  else
+    sender = addressing(ichunk,iproc_xi,iproc_eta - 1)
+  endif
+  if(iproc_eta == NPROC_ETA-1) then
+    receiver = MPI_PROC_NULL
+  else
+    receiver = addressing(ichunk,iproc_xi,iproc_eta + 1)
+  endif
+  call MPI_SENDRECV(buffer_send_faces_vector,NDIM*npoin2D_eta_all(2),CUSTOM_MPI_TYPE,receiver, &
+    itag2,buffer_received_faces_vector,NDIM*npoin2D_eta_all(1),CUSTOM_MPI_TYPE,sender, &
+    itag,MPI_COMM_WORLD,msg_status,ier)
+
+! all slices add the buffer received to the contributions on the left face
+  if(iproc_eta > 0) then
+
+  do ipoin = 1,npoin2D_eta_crust_mantle(1)
+    accel_crust_mantle(1,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(1,iboolleft_eta_crust_mantle(ipoin)) + &
+                              buffer_received_faces_vector(1,ipoin)
+    accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin)) + &
+                              buffer_received_faces_vector(2,ipoin)
+    accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin)) = accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin)) + &
+                              buffer_received_faces_vector(3,ipoin)
+  enddo
+
+  ioffset = npoin2D_eta_crust_mantle(1)
+  do ipoin = 1,npoin2D_eta_inner_core(1)
+    accel_inner_core(1,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(1,iboolleft_eta_inner_core(ipoin)) + &
+                              buffer_received_faces_vector(1,ioffset + ipoin)
+    accel_inner_core(2,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(2,iboolleft_eta_inner_core(ipoin)) + &
+                              buffer_received_faces_vector(2,ioffset + ipoin)
+    accel_inner_core(3,iboolleft_eta_inner_core(ipoin)) = accel_inner_core(3,iboolleft_eta_inner_core(ipoin)) + &
+                              buffer_received_faces_vector(3,ioffset + ipoin)
+  enddo
+
+  endif
+
+! the contributions are correctly assembled on the left side of each slice
+! now we have to send the result back to the sender
+! all slices copy the left face into the buffer
+! the buffer for the inner core starts right after the buffer for the crust and mantle
+  ioffset = npoin2D_eta_crust_mantle(1)
+
+  do ipoin = 1,npoin2D_eta_crust_mantle(1)
+    buffer_send_faces_vector(1,ipoin) = accel_crust_mantle(1,iboolleft_eta_crust_mantle(ipoin))
+    buffer_send_faces_vector(2,ipoin) = accel_crust_mantle(2,iboolleft_eta_crust_mantle(ipoin))
+    buffer_send_faces_vector(3,ipoin) = accel_crust_mantle(3,iboolleft_eta_crust_mantle(ipoin))
+  enddo
+
+  do ipoin = 1,npoin2D_eta_inner_core(1)
+    buffer_send_faces_vector(1,ioffset + ipoin) = accel_inner_core(1,iboolleft_eta_inner_core(ipoin))
+    buffer_send_faces_vector(2,ioffset + ipoin) = accel_inner_core(2,iboolleft_eta_inner_core(ipoin))
+    buffer_send_faces_vector(3,ioffset + ipoin) = accel_inner_core(3,iboolleft_eta_inner_core(ipoin))
+  enddo
+
+! send messages backward along each row
+  if(iproc_eta == NPROC_ETA-1) then
+    sender = MPI_PROC_NULL
+  else
+    sender = addressing(ichunk,iproc_xi,iproc_eta + 1)
+  endif
+  if(iproc_eta == 0) then
+    receiver = MPI_PROC_NULL
+  else
+    receiver = addressing(ichunk,iproc_xi,iproc_eta - 1)
+  endif
+  call MPI_SENDRECV(buffer_send_faces_vector,NDIM*npoin2D_eta_all(1),CUSTOM_MPI_TYPE,receiver, &
+    itag2,buffer_received_faces_vector,NDIM*npoin2D_eta_all(2),CUSTOM_MPI_TYPE,sender, &
+    itag,MPI_COMM_WORLD,msg_status,ier)
+
+! all slices copy the buffer received to the contributions on the right face
+  if(iproc_eta < NPROC_ETA-1) then
+
+  do ipoin = 1,npoin2D_eta_crust_mantle(2)
+    accel_crust_mantle(1,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(1,ipoin)
+    accel_crust_mantle(2,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(2,ipoin)
+    accel_crust_mantle(3,iboolright_eta_crust_mantle(ipoin)) = buffer_received_faces_vector(3,ipoin)
+  enddo
+
+  ioffset = npoin2D_eta_crust_mantle(2)
+  do ipoin = 1,npoin2D_eta_inner_core(2)
+    accel_inner_core(1,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(1,ioffset + ipoin)
+    accel_inner_core(2,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(2,ioffset + ipoin)
+    accel_inner_core(3,iboolright_eta_inner_core(ipoin)) = buffer_received_faces_vector(3,ioffset + ipoin)
+  enddo
+
+  endif
+
+  endif
+
+!----
+!---- start MPI assembling phase between chunks
+!----
+
+! check flag to see if we need to assemble (might be turned off when debugging)
+! and do not assemble if only one chunk
+  if (.not. ACTUALLY_ASSEMBLE_MPI_CHUNKS .or. NCHUNKS == 1) return
+
+! ***************************************************************
+!  transmit messages in forward direction (iprocfrom -> iprocto)
+! ***************************************************************
+
+!---- put slices in receive mode
+!---- a given slice can belong to at most two faces
+
+! use three step scheme that can never deadlock
+! scheme for faces cannot deadlock even if NPROC_XI = NPROC_ETA = 1
+  do imsg_loop = 1,NUM_MSG_TYPES
+
+  icount_faces = 0
+  do imsg = 1,NUMMSGS_FACES
+  if(myrank==iprocfrom_faces(imsg) .or. &
+       myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+  if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+    sender = iprocfrom_faces(imsg)
+
+! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+    npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
+
+! the buffer for the inner core starts right after the buffer for the crust and mantle
+    ioffset = npoin2D_faces_crust_mantle(icount_faces)
+
+    call MPI_RECV(buffer_received_faces_vector,NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,sender, &
+              itag,MPI_COMM_WORLD,msg_status,ier)
+
+    do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+      accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+         accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(1,ipoin2D)
+      accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+         accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(2,ipoin2D)
+      accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = &
+         accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) + buffer_received_faces_vector(3,ipoin2D)
+    enddo
+
+    do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+      accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+         accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) + buffer_received_faces_vector(1,ioffset + ipoin2D)
+      accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+         accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) + buffer_received_faces_vector(2,ioffset + ipoin2D)
+      accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = &
+         accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) + buffer_received_faces_vector(3,ioffset + ipoin2D)
+    enddo
+
+  endif
+  enddo
+
+!---- put slices in send mode
+!---- a given slice can belong to at most two faces
+  icount_faces = 0
+  do imsg = 1,NUMMSGS_FACES
+  if(myrank==iprocfrom_faces(imsg) .or. &
+       myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+  if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+    receiver = iprocto_faces(imsg)
+
+! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+    npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
+
+! the buffer for the inner core starts right after the buffer for the crust and mantle
+    ioffset = npoin2D_faces_crust_mantle(icount_faces)
+
+    do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+      buffer_send_faces_vector(1,ipoin2D) = accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+      buffer_send_faces_vector(2,ipoin2D) = accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+      buffer_send_faces_vector(3,ipoin2D) = accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+    enddo
+
+    do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+      buffer_send_faces_vector(1,ioffset + ipoin2D) = accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces))
+      buffer_send_faces_vector(2,ioffset + ipoin2D) = accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces))
+      buffer_send_faces_vector(3,ioffset + ipoin2D) = accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces))
+    enddo
+
+    call MPI_SEND(buffer_send_faces_vector,NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+
+  endif
+  enddo
+
+
+! *********************************************************************
+!  transmit messages back in opposite direction (iprocto -> iprocfrom)
+! *********************************************************************
+
+!---- put slices in receive mode
+!---- a given slice can belong to at most two faces
+
+  icount_faces = 0
+  do imsg = 1,NUMMSGS_FACES
+  if(myrank==iprocfrom_faces(imsg) .or. &
+       myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+  if(myrank==iprocfrom_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+    sender = iprocto_faces(imsg)
+
+! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+    npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
+
+! the buffer for the inner core starts right after the buffer for the crust and mantle
+    ioffset = npoin2D_faces_crust_mantle(icount_faces)
+
+    call MPI_RECV(buffer_received_faces_vector,NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,sender, &
+              itag,MPI_COMM_WORLD,msg_status,ier)
+
+    do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+      accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(1,ipoin2D)
+      accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(2,ipoin2D)
+      accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces)) = buffer_received_faces_vector(3,ipoin2D)
+    enddo
+
+    do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+      accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces)) = buffer_received_faces_vector(1,ioffset + ipoin2D)
+      accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces)) = buffer_received_faces_vector(2,ioffset + ipoin2D)
+      accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces)) = buffer_received_faces_vector(3,ioffset + ipoin2D)
+    enddo
+
+  endif
+  enddo
+
+!---- put slices in send mode
+!---- a given slice can belong to at most two faces
+  icount_faces = 0
+  do imsg = 1,NUMMSGS_FACES
+  if(myrank==iprocfrom_faces(imsg) .or. &
+       myrank==iprocto_faces(imsg)) icount_faces = icount_faces + 1
+  if(myrank==iprocto_faces(imsg) .and. imsg_type(imsg) == imsg_loop) then
+    receiver = iprocfrom_faces(imsg)
+
+! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+    npoin2D_chunks_all = npoin2D_faces_crust_mantle(icount_faces) + npoin2D_faces_inner_core(icount_faces)
+
+! the buffer for the inner core starts right after the buffer for the crust and mantle
+    ioffset = npoin2D_faces_crust_mantle(icount_faces)
+
+    do ipoin2D = 1,npoin2D_faces_crust_mantle(icount_faces)
+      buffer_send_faces_vector(1,ipoin2D) = accel_crust_mantle(1,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+      buffer_send_faces_vector(2,ipoin2D) = accel_crust_mantle(2,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+      buffer_send_faces_vector(3,ipoin2D) = accel_crust_mantle(3,iboolfaces_crust_mantle(ipoin2D,icount_faces))
+    enddo
+
+    do ipoin2D = 1,npoin2D_faces_inner_core(icount_faces)
+      buffer_send_faces_vector(1,ioffset + ipoin2D) = accel_inner_core(1,iboolfaces_inner_core(ipoin2D,icount_faces))
+      buffer_send_faces_vector(2,ioffset + ipoin2D) = accel_inner_core(2,iboolfaces_inner_core(ipoin2D,icount_faces))
+      buffer_send_faces_vector(3,ioffset + ipoin2D) = accel_inner_core(3,iboolfaces_inner_core(ipoin2D,icount_faces))
+    enddo
+
+    call MPI_SEND(buffer_send_faces_vector,NDIM*npoin2D_chunks_all,CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+
+  endif
+  enddo
+
+! end of anti-deadlocking loop
+  enddo
+
+
+!----
+!---- start MPI assembling corners
+!----
+
+! scheme for corners cannot deadlock even if NPROC_XI = NPROC_ETA = 1
+
+! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+  NGLOB1D_RADIAL_all = NGLOB1D_RADIAL_crust_mantle + NGLOB1D_RADIAL_inner_core
+
+! the buffer for the inner core starts right after the buffer for the crust and mantle
+  ioffset = NGLOB1D_RADIAL_crust_mantle
+
+! ***************************************************************
+!  transmit messages in forward direction (two workers -> master)
+! ***************************************************************
+
+  icount_corners = 0
+
+  do imsg = 1,NCORNERSCHUNKS
+
+  if(myrank == iproc_master_corners(imsg) .or. &
+     myrank == iproc_worker1_corners(imsg) .or. &
+     (NCHUNKS /= 2 .and. myrank == iproc_worker2_corners(imsg))) icount_corners = icount_corners + 1
+
+!---- receive messages from the two workers on the master
+  if(myrank==iproc_master_corners(imsg)) then
+
+! receive from worker #1 and add to local array
+    sender = iproc_worker1_corners(imsg)
+
+    call MPI_RECV(buffer_recv_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
+          CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+
+    do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
+      accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
+               accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_vector(1,ipoin1D)
+      accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
+               accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_vector(2,ipoin1D)
+      accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
+               accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_vector(3,ipoin1D)
+    enddo
+
+    do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
+      accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
+               accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_vector(1,ioffset + ipoin1D)
+      accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
+               accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_vector(2,ioffset + ipoin1D)
+      accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
+               accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_vector(3,ioffset + ipoin1D)
+    enddo
+
+! receive from worker #2 and add to local array
+  if(NCHUNKS /= 2) then
+
+    sender = iproc_worker2_corners(imsg)
+
+    call MPI_RECV(buffer_recv_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
+          CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+
+    do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
+      accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
+               accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_vector(1,ipoin1D)
+      accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
+               accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_vector(2,ipoin1D)
+      accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = &
+               accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_vector(3,ipoin1D)
+    enddo
+
+    do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
+      accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
+               accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_vector(1,ioffset + ipoin1D)
+      accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
+               accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_vector(2,ioffset + ipoin1D)
+      accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners)) = &
+               accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners)) + &
+               buffer_recv_chunkcorn_vector(3,ioffset + ipoin1D)
+    enddo
+
+  endif
+
+  endif
+
+!---- send messages from the two workers to the master
+  if(myrank==iproc_worker1_corners(imsg) .or. &
+              (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+
+    receiver = iproc_master_corners(imsg)
+
+    do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
+      buffer_send_chunkcorn_vector(1,ipoin1D) = accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners))
+      buffer_send_chunkcorn_vector(2,ipoin1D) = accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners))
+      buffer_send_chunkcorn_vector(3,ipoin1D) = accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners))
+    enddo
+
+    do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
+      buffer_send_chunkcorn_vector(1,ioffset + ipoin1D) = accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners))
+      buffer_send_chunkcorn_vector(2,ioffset + ipoin1D) = accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners))
+      buffer_send_chunkcorn_vector(3,ioffset + ipoin1D) = accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners))
+    enddo
+
+    call MPI_SEND(buffer_send_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all,CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+
+  endif
+
+! *********************************************************************
+!  transmit messages back in opposite direction (master -> two workers)
+! *********************************************************************
+
+!---- receive messages from the master on the two workers
+  if(myrank==iproc_worker1_corners(imsg) .or. &
+              (NCHUNKS /= 2 .and. myrank==iproc_worker2_corners(imsg))) then
+
+! receive from master and copy to local array
+    sender = iproc_master_corners(imsg)
+
+    call MPI_RECV(buffer_recv_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all, &
+          CUSTOM_MPI_TYPE,sender,itag,MPI_COMM_WORLD,msg_status,ier)
+
+    do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
+      accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_vector(1,ipoin1D)
+      accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_vector(2,ipoin1D)
+      accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_vector(3,ipoin1D)
+    enddo
+
+    do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
+      accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_vector(1,ioffset + ipoin1D)
+      accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_vector(2,ioffset + ipoin1D)
+      accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners)) = buffer_recv_chunkcorn_vector(3,ioffset + ipoin1D)
+    enddo
+
+  endif
+
+!---- send messages from the master to the two workers
+  if(myrank==iproc_master_corners(imsg)) then
+
+    do ipoin1D = 1,NGLOB1D_RADIAL_crust_mantle
+      buffer_send_chunkcorn_vector(1,ipoin1D) = accel_crust_mantle(1,iboolcorner_crust_mantle(ipoin1D,icount_corners))
+      buffer_send_chunkcorn_vector(2,ipoin1D) = accel_crust_mantle(2,iboolcorner_crust_mantle(ipoin1D,icount_corners))
+      buffer_send_chunkcorn_vector(3,ipoin1D) = accel_crust_mantle(3,iboolcorner_crust_mantle(ipoin1D,icount_corners))
+    enddo
+
+    do ipoin1D = 1,NGLOB1D_RADIAL_inner_core
+      buffer_send_chunkcorn_vector(1,ioffset + ipoin1D) = accel_inner_core(1,iboolcorner_inner_core(ipoin1D,icount_corners))
+      buffer_send_chunkcorn_vector(2,ioffset + ipoin1D) = accel_inner_core(2,iboolcorner_inner_core(ipoin1D,icount_corners))
+      buffer_send_chunkcorn_vector(3,ioffset + ipoin1D) = accel_inner_core(3,iboolcorner_inner_core(ipoin1D,icount_corners))
+    enddo
+
+! send to worker #1
+    receiver = iproc_worker1_corners(imsg)
+    call MPI_SEND(buffer_send_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all,CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+
+! send to worker #2
+  if(NCHUNKS /= 2) then
+    receiver = iproc_worker2_corners(imsg)
+    call MPI_SEND(buffer_send_chunkcorn_vector,NDIM*NGLOB1D_RADIAL_all,CUSTOM_MPI_TYPE,receiver,itag,MPI_COMM_WORLD,ier)
+
+  endif
+
+  endif
+
+  enddo
+
+  end subroutine assemble_MPI_vector_block
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_noDev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_noDev.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_crust_mantle_noDev.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,973 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine compute_forces_crust_mantle(minus_gravity_table,density_table,minus_deriv_gravity_table, &
+          displ_crust_mantle,accel_crust_mantle,xstore,ystore,zstore, &
+          xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+            is_on_a_slice_edge_crust_mantle,icall, &
+            accel_inner_core,ibool_inner_core,idoubling_inner_core, &
+            myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+            iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+            npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+            iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+            iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+            npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+            iboolfaces_inner_core,iboolcorner_inner_core, &
+            iprocfrom_faces,iprocto_faces, &
+            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+            buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+            buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
+            nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+            npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+            receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+          hprime_xx,hprime_yy,hprime_zz, &
+          hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+          wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+          kappavstore,kappahstore,muvstore,muhstore,eta_anisostore, &
+          c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
+          c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
+          c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
+          ibool,ispec_is_tiso, &
+          R_memory,one_minus_sum_beta,deltat,veloc_crust_mantle, &
+          alphaval,betaval,gammaval,factor_common,vx,vy,vz,vnspec,PARTIAL_PHYS_DISPERSION_ONLY,&
+          istage,R_memory_lddrk,tau_sigma_CUSTOM_REAL)
+
+  implicit none
+
+  include "constants.h"
+
+! include values created by the mesher
+! done for performance only using static allocation to allow for loop unrolling
+  include "OUTPUT_FILES/values_from_mesher.h"
+
+! model_attenuation_variables
+!  type model_attenuation_variables
+!    sequence
+!    double precision min_period, max_period
+!    double precision                          :: QT_c_source        ! Source Frequency
+!    double precision, dimension(:), pointer   :: Qtau_s             ! tau_sigma
+!    double precision, dimension(:), pointer   :: QrDisc             ! Discontinutitues Defined
+!    double precision, dimension(:), pointer   :: Qr                 ! Radius
+!    double precision, dimension(:), pointer   :: Qmu                ! Shear Attenuation
+!    double precision, dimension(:,:), pointer :: Qtau_e             ! tau_epsilon
+!    double precision, dimension(:), pointer   :: Qomsb, Qomsb2      ! one_minus_sum_beta
+!    double precision, dimension(:,:), pointer :: Qfc, Qfc2          ! factor_common
+!    double precision, dimension(:), pointer   :: Qsf, Qsf2          ! scale_factor
+!    integer, dimension(:), pointer            :: Qrmin              ! Max and Mins of idoubling
+!    integer, dimension(:), pointer            :: Qrmax              ! Max and Mins of idoubling
+!    integer, dimension(:), pointer            :: interval_Q                 ! Steps
+!    integer                                   :: Qn                 ! Number of points
+!    integer dummy_pad ! padding 4 bytes to align the structure
+!  end type model_attenuation_variables
+
+! array with the local to global mapping per slice
+!  integer, dimension(NSPEC_CRUST_MANTLE) :: idoubling
+  logical, dimension(NSPEC_CRUST_MANTLE) :: ispec_is_tiso
+
+! displacement and acceleration
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: displ_crust_mantle,accel_crust_mantle,veloc_crust_mantle
+
+! memory variables for attenuation
+! memory variables R_ij are stored at the local rather than global level
+! to allow for optimization of cache access by compiler
+  integer i_SLS,i_memory
+! variable sized array variables for one_minus_sum_beta and factor_common
+  integer vx, vy, vz, vnspec
+
+  real(kind=CUSTOM_REAL) one_minus_sum_beta_use,minus_sum_beta,deltat
+  real(kind=CUSTOM_REAL), dimension(vx, vy, vz, vnspec) :: one_minus_sum_beta
+
+! for attenuation
+  real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
+  real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory
+  logical :: PARTIAL_PHYS_DISPERSION_ONLY
+
+! [alpha,beta,gamma]val reduced to N_SLS and factor_common to N_SLS*NUM_NODES
+  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
+  real(kind=CUSTOM_REAL), dimension(N_SLS, vx, vy, vz, vnspec) :: factor_common
+  real(kind=CUSTOM_REAL), dimension(NGLLX, NGLLY, NGLLZ) :: factor_common_c44_muv
+
+  real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
+  real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc_nplus1
+
+! arrays with mesh parameters per slice
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
+        xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+
+! array with derivatives of Lagrange polynomials and precalculated products
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
+  real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
+
+! x y and z contain r theta and phi
+  real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: xstore,ystore,zstore
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: &
+        kappavstore,muvstore
+
+! store anisotropic properties only where needed to save memory
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_TISO_MANTLE) :: &
+        kappahstore,muhstore,eta_anisostore
+
+! arrays for full anisotropy only when needed
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_MANTLE) :: &
+        c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
+        c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
+        c36store,c44store,c45store,c46store,c55store,c56store,c66store
+
+  integer ispec,iglob,ispec_strain
+  integer i,j,k,l
+  real(kind=CUSTOM_REAL) templ
+
+! the 21 coefficients for an anisotropic medium in reduced notation
+  real(kind=CUSTOM_REAL) c11,c22,c33,c44,c55,c66,c12,c13,c23,c14,c24,c34,c15,c25,c35,c45,c16,c26,c36,c46,c56
+
+  real(kind=CUSTOM_REAL) rhovphsq,sinphifour,cosphisq,sinphisq,costhetasq,rhovsvsq,sinthetasq, &
+        cosphifour,costhetafour,rhovpvsq,sinthetafour,rhovshsq,cosfourphi, &
+        costwotheta,cosfourtheta,sintwophisq,costheta,sinphi,sintheta,cosphi, &
+        sintwotheta,costwophi,sintwophi,costwothetasq,costwophisq,phi,theta
+
+  real(kind=CUSTOM_REAL) two_rhovpvsq,two_rhovphsq,two_rhovsvsq,two_rhovshsq
+  real(kind=CUSTOM_REAL) four_rhovpvsq,four_rhovphsq,four_rhovsvsq,four_rhovshsq
+
+  real(kind=CUSTOM_REAL) twoetaminone,etaminone,eta_aniso
+  real(kind=CUSTOM_REAL) two_eta_aniso,four_eta_aniso,six_eta_aniso
+
+  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+  real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
+
+  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
+
+  real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
+
+  real(kind=CUSTOM_REAL) hp1,hp2,hp3
+  real(kind=CUSTOM_REAL) fac1,fac2,fac3
+  real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
+  real(kind=CUSTOM_REAL) kappal,kappavl,kappahl,muvl,muhl
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sum_terms
+
+  real(kind=CUSTOM_REAL) tempx1l,tempx2l,tempx3l
+  real(kind=CUSTOM_REAL) tempy1l,tempy2l,tempy3l
+  real(kind=CUSTOM_REAL) tempz1l,tempz2l,tempz3l
+
+! for gravity
+  integer int_radius
+  real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
+  double precision radius,rho,minus_g,minus_dg
+  double precision minus_g_over_radius,minus_dg_plus_g_over_radius
+  double precision cos_theta,sin_theta,cos_phi,sin_phi
+  double precision cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq
+  double precision factor,sx_l,sy_l,sz_l,gxl,gyl,gzl
+  double precision Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl
+  double precision, dimension(NRAD_GRAVITY) :: minus_gravity_table,density_table,minus_deriv_gravity_table
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
+  double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
+
+! this for non blocking MPI
+  integer :: iphase,icall
+
+  integer :: computed_elements
+
+  logical, dimension(NSPEC_CRUST_MANTLE) :: is_on_a_slice_edge_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: accel_inner_core
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
+
+  integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
+
+  integer :: ichunk,iproc_xi,iproc_eta,myrank
+
+  integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
+
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
+
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
+
+  integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
+  integer npoin2D_faces_inner_core(NUMFACES_SHARED)
+
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+       npoin2D_xi_inner_core,npoin2D_eta_inner_core
+
+! communication pattern for faces between chunks
+  integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces
+
+! communication pattern for corners between chunks
+  integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+  integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
+  integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
+
+  integer, dimension(NGLOB2DMAX_XY_CM_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
+  integer, dimension(NGLOB2DMAX_XY_IC_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
+
+  integer :: npoin2D_max_all_CM_IC
+  real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC) :: buffer_send_faces,buffer_received_faces
+
+! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC) :: &
+     buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector
+
+! for matching with central cube in inner core
+  integer nb_msgs_theor_in_cube, npoin2D_cube_from_slices,iphase_CC
+  integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
+  double precision, dimension(npoin2D_cube_from_slices,NDIM) :: buffer_slices
+  double precision, dimension(npoin2D_cube_from_slices,NDIM,nb_msgs_theor_in_cube) :: buffer_all_cube_from_slices
+  integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
+  integer receiver_cube_from_slices
+  logical :: INCLUDE_CENTRAL_CUBE
+
+! local to global mapping
+  integer NSPEC2D_BOTTOM_INNER_CORE
+  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
+
+!for LDDRK
+  integer :: istage
+  real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory_lddrk
+  real(kind=CUSTOM_REAL),dimension(N_SLS) :: tau_sigma_CUSTOM_REAL
+
+! ****************************************************
+!   big loop over all spectral elements in the solid
+! ****************************************************
+
+  computed_elements = 0
+
+  do ispec = 1,NSPEC_CRUST_MANTLE
+
+! hide communications by computing the edges first
+    if((icall == 2 .and. is_on_a_slice_edge_crust_mantle(ispec)) .or. &
+       (icall == 1 .and. .not. is_on_a_slice_edge_crust_mantle(ispec))) cycle
+
+! process the non-blocking communications every ELEMENTS_NONBLOCKING elements
+    computed_elements = computed_elements + 1
+    if (icall == 2 .and. mod(computed_elements,ELEMENTS_NONBLOCKING_CM_IC) == 0) then
+
+      if(iphase <= 7) call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
+            iproc_xi,iproc_eta,ichunk,addressing, &
+            iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+            npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+            iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+            iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+            npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+            iboolfaces_inner_core,iboolcorner_inner_core, &
+            iprocfrom_faces,iprocto_faces, &
+            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+            buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+            buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
+            NUMMSGS_FACES_VAL,NCORNERSCHUNKS_VAL, &
+            NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL_CM, &
+            NGLOB1D_RADIAL_IC,NCHUNKS_VAL,iphase)
+
+      if(INCLUDE_CENTRAL_CUBE) then
+          if(iphase > 7 .and. iphase_CC <= 4) &
+            call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+                   npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+                   receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
+                   ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,accel_inner_core,NDIM,iphase_CC)
+      endif
+
+    endif
+
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempx2l = 0._CUSTOM_REAL
+          tempx3l = 0._CUSTOM_REAL
+
+          tempy1l = 0._CUSTOM_REAL
+          tempy2l = 0._CUSTOM_REAL
+          tempy3l = 0._CUSTOM_REAL
+
+          tempz1l = 0._CUSTOM_REAL
+          tempz2l = 0._CUSTOM_REAL
+          tempz3l = 0._CUSTOM_REAL
+
+          do l=1,NGLLX
+            hp1 = hprime_xx(i,l)
+            iglob = ibool(l,j,k,ispec)
+            tempx1l = tempx1l + displ_crust_mantle(1,iglob)*hp1
+            tempy1l = tempy1l + displ_crust_mantle(2,iglob)*hp1
+            tempz1l = tempz1l + displ_crust_mantle(3,iglob)*hp1
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+            hp2 = hprime_yy(j,l)
+            iglob = ibool(i,l,k,ispec)
+            tempx2l = tempx2l + displ_crust_mantle(1,iglob)*hp2
+            tempy2l = tempy2l + displ_crust_mantle(2,iglob)*hp2
+            tempz2l = tempz2l + displ_crust_mantle(3,iglob)*hp2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+            hp3 = hprime_zz(k,l)
+            iglob = ibool(i,j,l,ispec)
+            tempx3l = tempx3l + displ_crust_mantle(1,iglob)*hp3
+            tempy3l = tempy3l + displ_crust_mantle(2,iglob)*hp3
+            tempz3l = tempz3l + displ_crust_mantle(3,iglob)*hp3
+          enddo
+
+!         get derivatives of ux, uy and uz with respect to x, y and z
+
+          xixl = xix(i,j,k,ispec)
+          xiyl = xiy(i,j,k,ispec)
+          xizl = xiz(i,j,k,ispec)
+          etaxl = etax(i,j,k,ispec)
+          etayl = etay(i,j,k,ispec)
+          etazl = etaz(i,j,k,ispec)
+          gammaxl = gammax(i,j,k,ispec)
+          gammayl = gammay(i,j,k,ispec)
+          gammazl = gammaz(i,j,k,ispec)
+
+! compute the jacobian
+          jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
+                        - xiyl*(etaxl*gammazl-etazl*gammaxl) &
+                        + xizl*(etaxl*gammayl-etayl*gammaxl))
+
+          duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+          duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+          duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+          duydxl = xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l
+          duydyl = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l
+          duydzl = xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l
+
+          duzdxl = xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l
+          duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l
+          duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l
+
+! precompute some sums to save CPU time
+          duxdxl_plus_duydyl = duxdxl + duydyl
+          duxdxl_plus_duzdzl = duxdxl + duzdzl
+          duydyl_plus_duzdzl = duydyl + duzdzl
+          duxdyl_plus_duydxl = duxdyl + duydxl
+          duzdxl_plus_duxdzl = duzdxl + duxdzl
+          duzdyl_plus_duydzl = duzdyl + duydzl
+
+! compute deviatoric strain
+          if (COMPUTE_AND_STORE_STRAIN) then
+            if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) then
+              ispec_strain = 1
+            else
+              ispec_strain = ispec
+            endif
+
+!ZN beware, here the expression differs from the strain used in memory variable equation (6) in D. Komatitsch and J. Tromp 1999,
+!ZN here Brian Savage uses the engineering strain which are epsilon = 1/2*(grad U + (grad U)^T),
+!ZN where U is the displacement vector and grad the gradient operator, i.e. there is a 1/2 factor difference between the two.
+!ZN Both expressions are fine, but we need to keep in mind that if we put the 1/2 factor here there we need to remove it
+!ZN from the expression in which we use the strain later in the code.
+            templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+            epsilondev_loc(1,i,j,k) = duxdxl - templ
+            epsilondev_loc(2,i,j,k) = duydyl - templ
+            epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl
+            epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl
+            epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
+          endif
+
+          ! precompute terms for attenuation if needed
+          if(ATTENUATION_VAL) then
+            one_minus_sum_beta_use = one_minus_sum_beta(i,j,k,ispec)
+            minus_sum_beta =  one_minus_sum_beta_use - 1.0
+          endif
+
+        !
+        ! compute either isotropic or anisotropic elements
+        !
+
+          if(ANISOTROPIC_3D_MANTLE_VAL) then
+
+            c11 = c11store(i,j,k,ispec)
+            c12 = c12store(i,j,k,ispec)
+            c13 = c13store(i,j,k,ispec)
+            c14 = c14store(i,j,k,ispec)
+            c15 = c15store(i,j,k,ispec)
+            c16 = c16store(i,j,k,ispec)
+            c22 = c22store(i,j,k,ispec)
+            c23 = c23store(i,j,k,ispec)
+            c24 = c24store(i,j,k,ispec)
+            c25 = c25store(i,j,k,ispec)
+            c26 = c26store(i,j,k,ispec)
+            c33 = c33store(i,j,k,ispec)
+            c34 = c34store(i,j,k,ispec)
+            c35 = c35store(i,j,k,ispec)
+            c36 = c36store(i,j,k,ispec)
+            c44 = c44store(i,j,k,ispec)
+            c45 = c45store(i,j,k,ispec)
+            c46 = c46store(i,j,k,ispec)
+            c55 = c55store(i,j,k,ispec)
+            c56 = c56store(i,j,k,ispec)
+            c66 = c66store(i,j,k,ispec)
+
+            if(ATTENUATION_VAL) then
+              mul = c44
+              c11 = c11 + FOUR_THIRDS * minus_sum_beta * mul
+              c12 = c12 - TWO_THIRDS * minus_sum_beta * mul
+              c13 = c13 - TWO_THIRDS * minus_sum_beta * mul
+              c22 = c22 + FOUR_THIRDS * minus_sum_beta * mul
+              c23 = c23 - TWO_THIRDS * minus_sum_beta * mul
+              c33 = c33 + FOUR_THIRDS * minus_sum_beta * mul
+              c44 = c44 + minus_sum_beta * mul
+              c55 = c55 + minus_sum_beta * mul
+              c66 = c66 + minus_sum_beta * mul
+            endif
+
+            sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
+                       c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
+
+            sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
+                       c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
+
+            sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
+                       c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
+
+            sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
+                       c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
+
+            sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
+                       c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
+
+            sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
+                       c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
+
+          else
+
+        ! do not use transverse isotropy except if element is between d220 and Moho
+!            if(.not. (TRANSVERSE_ISOTROPY_VAL .and. (idoubling(ispec)==IFLAG_220_80 .or. idoubling(ispec)==IFLAG_80_MOHO))) then
+            if( .not. ispec_is_tiso(ispec) ) then
+        ! layer with no transverse isotropy, use kappav and muv
+              kappal = kappavstore(i,j,k,ispec)
+              mul = muvstore(i,j,k,ispec)
+
+        ! use unrelaxed parameters if attenuation
+              if(ATTENUATION_VAL) mul = mul * one_minus_sum_beta_use
+
+              lambdalplus2mul = kappal + FOUR_THIRDS * mul
+              lambdal = lambdalplus2mul - 2.*mul
+
+        ! compute stress sigma
+
+              sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
+              sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
+              sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
+
+              sigma_xy = mul*duxdyl_plus_duydxl
+              sigma_xz = mul*duzdxl_plus_duxdzl
+              sigma_yz = mul*duzdyl_plus_duydzl
+
+            else
+
+        ! use Kappa and mu from transversely isotropic model
+              kappavl = kappavstore(i,j,k,ispec)
+              muvl = muvstore(i,j,k,ispec)
+
+              kappahl = kappahstore(i,j,k,ispec)
+              muhl = muhstore(i,j,k,ispec)
+
+        ! use unrelaxed parameters if attenuation
+        ! eta does not need to be shifted since it is a ratio
+              if(ATTENUATION_VAL) then
+                muvl = muvl * one_minus_sum_beta_use
+                muhl = muhl * one_minus_sum_beta_use
+              endif
+
+              rhovpvsq = kappavl + FOUR_THIRDS * muvl  !!! that is C
+              rhovphsq = kappahl + FOUR_THIRDS * muhl  !!! that is A
+
+              rhovsvsq = muvl  !!! that is L
+              rhovshsq = muhl  !!! that is N
+
+              eta_aniso = eta_anisostore(i,j,k,ispec)  !!! that is  F / (A - 2 L)
+
+        ! use mesh coordinates to get theta and phi
+        ! ystore and zstore contain theta and phi
+
+              iglob = ibool(i,j,k,ispec)
+              theta = ystore(iglob)
+              phi = zstore(iglob)
+
+              costheta = cos(theta)
+              sintheta = sin(theta)
+              cosphi = cos(phi)
+              sinphi = sin(phi)
+
+              costhetasq = costheta * costheta
+              sinthetasq = sintheta * sintheta
+              cosphisq = cosphi * cosphi
+              sinphisq = sinphi * sinphi
+
+              costhetafour = costhetasq * costhetasq
+              sinthetafour = sinthetasq * sinthetasq
+              cosphifour = cosphisq * cosphisq
+              sinphifour = sinphisq * sinphisq
+
+              costwotheta = cos(2.*theta)
+              sintwotheta = sin(2.*theta)
+              costwophi = cos(2.*phi)
+              sintwophi = sin(2.*phi)
+
+              cosfourtheta = cos(4.*theta)
+              cosfourphi = cos(4.*phi)
+
+              costwothetasq = costwotheta * costwotheta
+
+              costwophisq = costwophi * costwophi
+              sintwophisq = sintwophi * sintwophi
+
+              etaminone = eta_aniso - 1.
+              twoetaminone = 2. * eta_aniso - 1.
+
+        ! precompute some products to reduce the CPU time
+
+              two_eta_aniso = 2.*eta_aniso
+              four_eta_aniso = 4.*eta_aniso
+              six_eta_aniso = 6.*eta_aniso
+
+              two_rhovpvsq = 2.*rhovpvsq
+              two_rhovphsq = 2.*rhovphsq
+              two_rhovsvsq = 2.*rhovsvsq
+              two_rhovshsq = 2.*rhovshsq
+
+              four_rhovpvsq = 4.*rhovpvsq
+              four_rhovphsq = 4.*rhovphsq
+              four_rhovsvsq = 4.*rhovsvsq
+              four_rhovshsq = 4.*rhovshsq
+
+        ! the 21 anisotropic coefficients computed using Mathematica
+
+             c11 = rhovphsq*sinphifour + 2.*cosphisq*sinphisq* &
+               (rhovphsq*costhetasq + (eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq)* &
+                  sinthetasq) + cosphifour* &
+               (rhovphsq*costhetafour + 2.*(eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq)* &
+                  costhetasq*sinthetasq + rhovpvsq*sinthetafour)
+
+             c12 = ((rhovphsq - two_rhovshsq)*(3. + cosfourphi)*costhetasq)/4. - &
+              four_rhovshsq*cosphisq*costhetasq*sinphisq + &
+              (rhovphsq*(11. + 4.*costwotheta + cosfourtheta)*sintwophisq)/32. + &
+              eta_aniso*(rhovphsq - two_rhovsvsq)*(cosphifour + &
+                 2.*cosphisq*costhetasq*sinphisq + sinphifour)*sinthetasq + &
+              rhovpvsq*cosphisq*sinphisq*sinthetafour - &
+              rhovsvsq*sintwophisq*sinthetafour
+
+             c13 = (cosphisq*(rhovphsq + six_eta_aniso*rhovphsq + rhovpvsq - four_rhovsvsq - &
+                   12.*eta_aniso*rhovsvsq + (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - &
+                      four_eta_aniso*rhovsvsq)*cosfourtheta))/8. + &
+              sinphisq*(eta_aniso*(rhovphsq - two_rhovsvsq)*costhetasq + &
+                 (rhovphsq - two_rhovshsq)*sinthetasq)
+
+             c14 = costheta*sinphi*((cosphisq* &
+                   (-rhovphsq + rhovpvsq + four_rhovshsq - four_rhovsvsq + &
+                     (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + four_rhovsvsq - &
+                        four_eta_aniso*rhovsvsq)*costwotheta))/2. + &
+                (etaminone*rhovphsq + 2.*(rhovshsq - eta_aniso*rhovsvsq))*sinphisq)* sintheta
+
+             c15 = cosphi*costheta*((cosphisq* (-rhovphsq + rhovpvsq + &
+                     (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - four_eta_aniso*rhovsvsq)* &
+                      costwotheta))/2. + etaminone*(rhovphsq - two_rhovsvsq)*sinphisq)*sintheta
+
+             c16 = (cosphi*sinphi*(cosphisq* (-rhovphsq + rhovpvsq + &
+                     (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + four_rhovsvsq - &
+                        four_eta_aniso*rhovsvsq)*costwotheta) + &
+                  2.*etaminone*(rhovphsq - two_rhovsvsq)*sinphisq)*sinthetasq)/2.
+
+             c22 = rhovphsq*cosphifour + 2.*cosphisq*sinphisq* &
+               (rhovphsq*costhetasq + (eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq)* &
+                  sinthetasq) + sinphifour* &
+               (rhovphsq*costhetafour + 2.*(eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq)* &
+                  costhetasq*sinthetasq + rhovpvsq*sinthetafour)
+
+             c23 = ((rhovphsq + six_eta_aniso*rhovphsq + rhovpvsq - four_rhovsvsq - 12.*eta_aniso*rhovsvsq + &
+                   (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - four_eta_aniso*rhovsvsq)* &
+                    cosfourtheta)*sinphisq)/8. + &
+              cosphisq*(eta_aniso*(rhovphsq - two_rhovsvsq)*costhetasq + &
+                 (rhovphsq - two_rhovshsq)*sinthetasq)
+
+             c24 = costheta*sinphi*(etaminone*(rhovphsq - two_rhovsvsq)*cosphisq + &
+                ((-rhovphsq + rhovpvsq + (twoetaminone*rhovphsq - rhovpvsq + &
+                        four_rhovsvsq - four_eta_aniso*rhovsvsq)*costwotheta)*sinphisq)/2.)*sintheta
+
+             c25 = cosphi*costheta*((etaminone*rhovphsq + 2.*(rhovshsq - eta_aniso*rhovsvsq))* &
+                 cosphisq + ((-rhovphsq + rhovpvsq + four_rhovshsq - four_rhovsvsq + &
+                     (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + four_rhovsvsq - &
+                        four_eta_aniso*rhovsvsq)*costwotheta)*sinphisq)/2.)*sintheta
+
+             c26 = (cosphi*sinphi*(2.*etaminone*(rhovphsq - two_rhovsvsq)*cosphisq + &
+                  (-rhovphsq + rhovpvsq + (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + four_rhovsvsq - &
+                        four_eta_aniso*rhovsvsq)*costwotheta)*sinphisq)*sinthetasq)/2.
+
+             c33 = rhovpvsq*costhetafour + 2.*(eta_aniso*(rhovphsq - two_rhovsvsq) + two_rhovsvsq)* &
+               costhetasq*sinthetasq + rhovphsq*sinthetafour
+
+             c34 = -((rhovphsq - rhovpvsq + (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq &
+                       - four_eta_aniso*rhovsvsq)*costwotheta)*sinphi*sintwotheta)/4.
+
+             c35 = -(cosphi*(rhovphsq - rhovpvsq + &
+                   (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - four_eta_aniso*rhovsvsq)* &
+                    costwotheta)*sintwotheta)/4.
+
+             c36 = -((rhovphsq - rhovpvsq - four_rhovshsq + four_rhovsvsq + &
+                   (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - four_eta_aniso*rhovsvsq)* &
+                    costwotheta)*sintwophi*sinthetasq)/4.
+
+             c44 = cosphisq*(rhovsvsq*costhetasq + rhovshsq*sinthetasq) + &
+              sinphisq*(rhovsvsq*costwothetasq + &
+                 (rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq + four_eta_aniso*rhovsvsq)*costhetasq* sinthetasq)
+
+             c45 = ((rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq - two_rhovshsq - two_rhovsvsq + &
+                  four_eta_aniso*rhovsvsq + (rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq + &
+                     4.*etaminone*rhovsvsq)*costwotheta)*sintwophi*sinthetasq)/4.
+
+             c46 = -(cosphi*costheta*((rhovshsq - rhovsvsq)*cosphisq - &
+                  ((rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq - two_rhovshsq - two_rhovsvsq + &
+                       four_eta_aniso*rhovsvsq + (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + &
+                          four_rhovsvsq - four_eta_aniso*rhovsvsq)*costwotheta)*sinphisq)/2.)* sintheta)
+
+             c55 = sinphisq*(rhovsvsq*costhetasq + rhovshsq*sinthetasq) + &
+              cosphisq*(rhovsvsq*costwothetasq + &
+                 (rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq + four_eta_aniso*rhovsvsq)*costhetasq* sinthetasq)
+
+             c56 = costheta*sinphi*((cosphisq* &
+                   (rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq - two_rhovshsq - two_rhovsvsq + &
+                     four_eta_aniso*rhovsvsq + (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + &
+                        four_rhovsvsq - four_eta_aniso*rhovsvsq)*costwotheta))/2. + &
+                (-rhovshsq + rhovsvsq)*sinphisq)*sintheta
+
+             c66 = rhovshsq*costwophisq*costhetasq - &
+              2.*(rhovphsq - two_rhovshsq)*cosphisq*costhetasq*sinphisq + &
+              (rhovphsq*(11. + 4.*costwotheta + cosfourtheta)*sintwophisq)/32. - &
+              (rhovsvsq*(-6. - 2.*cosfourphi + cos(4.*phi - 2.*theta) - 2.*costwotheta + &
+                   cos(2.*(2.*phi + theta)))*sinthetasq)/8. + &
+              rhovpvsq*cosphisq*sinphisq*sinthetafour - &
+              (eta_aniso*(rhovphsq - two_rhovsvsq)*sintwophisq*sinthetafour)/2.
+
+        ! general expression of stress tensor for full Cijkl with 21 coefficients
+
+             sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
+                       c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
+
+             sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
+                       c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
+
+             sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
+                       c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
+
+             sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
+                       c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
+
+             sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
+                       c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
+
+             sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
+                       c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
+
+            endif
+
+          endif   ! end of test whether isotropic or anisotropic element
+
+        ! subtract memory variables if attenuation
+          if(ATTENUATION_VAL .and. ( PARTIAL_PHYS_DISPERSION_ONLY .eqv. .false. ) ) then
+            do i_SLS = 1,N_SLS
+              R_xx_val = R_memory(1,i_SLS,i,j,k,ispec)
+              R_yy_val = R_memory(2,i_SLS,i,j,k,ispec)
+              sigma_xx = sigma_xx - R_xx_val
+              sigma_yy = sigma_yy - R_yy_val
+              sigma_zz = sigma_zz + R_xx_val + R_yy_val
+              sigma_xy = sigma_xy - R_memory(3,i_SLS,i,j,k,ispec)
+              sigma_xz = sigma_xz - R_memory(4,i_SLS,i,j,k,ispec)
+              sigma_yz = sigma_yz - R_memory(5,i_SLS,i,j,k,ispec)
+            enddo
+          endif
+
+        ! define symmetric components of sigma for gravity
+          sigma_yx = sigma_xy
+          sigma_zx = sigma_xz
+          sigma_zy = sigma_yz
+
+        ! compute non-symmetric terms for gravity
+          if(GRAVITY_VAL) then
+
+        ! use mesh coordinates to get theta and phi
+        ! x y and z contain r theta and phi
+
+            iglob = ibool(i,j,k,ispec)
+            radius = dble(xstore(iglob))
+            theta = ystore(iglob)
+            phi = zstore(iglob)
+
+            cos_theta = dcos(dble(theta))
+            sin_theta = dsin(dble(theta))
+            cos_phi = dcos(dble(phi))
+            sin_phi = dsin(dble(phi))
+
+        ! get g, rho and dg/dr=dg
+        ! spherical components of the gravitational acceleration
+        ! for efficiency replace with lookup table every 100 m in radial direction
+            int_radius = nint(radius * R_EARTH_KM * 10.d0)
+            minus_g = minus_gravity_table(int_radius)
+            minus_dg = minus_deriv_gravity_table(int_radius)
+            rho = density_table(int_radius)
+
+        ! Cartesian components of the gravitational acceleration
+            gxl = minus_g*sin_theta*cos_phi
+            gyl = minus_g*sin_theta*sin_phi
+            gzl = minus_g*cos_theta
+
+        ! Cartesian components of gradient of gravitational acceleration
+        ! obtained from spherical components
+
+            minus_g_over_radius = minus_g / radius
+            minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius
+
+            cos_theta_sq = cos_theta**2
+            sin_theta_sq = sin_theta**2
+            cos_phi_sq = cos_phi**2
+            sin_phi_sq = sin_phi**2
+
+            Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq
+            Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq
+            Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq
+            Hxyl = cos_phi*minus_dg_plus_g_over_radius*sin_phi*sin_theta_sq
+            Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta
+            Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
+
+            iglob = ibool(i,j,k,ispec)
+
+        ! distinguish between single and double precision for reals
+            if(CUSTOM_REAL == SIZE_REAL) then
+
+        ! get displacement and multiply by density to compute G tensor
+              sx_l = rho * dble(displ_crust_mantle(1,iglob))
+              sy_l = rho * dble(displ_crust_mantle(2,iglob))
+              sz_l = rho * dble(displ_crust_mantle(3,iglob))
+
+        ! compute G tensor from s . g and add to sigma (not symmetric)
+              sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
+              sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
+              sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
+
+              sigma_xy = sigma_xy - sngl(sx_l * gyl)
+              sigma_yx = sigma_yx - sngl(sy_l * gxl)
+
+              sigma_xz = sigma_xz - sngl(sx_l * gzl)
+              sigma_zx = sigma_zx - sngl(sz_l * gxl)
+
+              sigma_yz = sigma_yz - sngl(sy_l * gzl)
+              sigma_zy = sigma_zy - sngl(sz_l * gyl)
+
+        ! precompute vector
+              factor = dble(jacobianl) * wgll_cube(i,j,k)
+              rho_s_H(1,i,j,k) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+              rho_s_H(2,i,j,k) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+              rho_s_H(3,i,j,k) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+
+            else
+
+        ! get displacement and multiply by density to compute G tensor
+              sx_l = rho * displ_crust_mantle(1,iglob)
+              sy_l = rho * displ_crust_mantle(2,iglob)
+              sz_l = rho * displ_crust_mantle(3,iglob)
+
+        ! compute G tensor from s . g and add to sigma (not symmetric)
+              sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
+              sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
+              sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
+
+              sigma_xy = sigma_xy - sx_l * gyl
+              sigma_yx = sigma_yx - sy_l * gxl
+
+              sigma_xz = sigma_xz - sx_l * gzl
+              sigma_zx = sigma_zx - sz_l * gxl
+
+              sigma_yz = sigma_yz - sy_l * gzl
+              sigma_zy = sigma_zy - sz_l * gyl
+
+        ! precompute vector
+              factor = jacobianl * wgll_cube(i,j,k)
+              rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+              rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+              rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+
+            endif
+
+          endif  ! end of section with gravity terms
+
+        ! form dot product with test vector, non-symmetric form
+        tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
+        tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
+        tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
+
+        tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
+        tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
+        tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
+
+        tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
+        tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
+        tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
+
+        enddo ! NGLLX
+      enddo ! NGLLY
+    enddo ! NGLLZ
+
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempy1l = 0._CUSTOM_REAL
+          tempz1l = 0._CUSTOM_REAL
+
+          tempx2l = 0._CUSTOM_REAL
+          tempy2l = 0._CUSTOM_REAL
+          tempz2l = 0._CUSTOM_REAL
+
+          tempx3l = 0._CUSTOM_REAL
+          tempy3l = 0._CUSTOM_REAL
+          tempz3l = 0._CUSTOM_REAL
+
+          do l=1,NGLLX
+            fac1 = hprimewgll_xx(l,i)
+            tempx1l = tempx1l + tempx1(l,j,k)*fac1
+            tempy1l = tempy1l + tempy1(l,j,k)*fac1
+            tempz1l = tempz1l + tempz1(l,j,k)*fac1
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+            fac2 = hprimewgll_yy(l,j)
+            tempx2l = tempx2l + tempx2(i,l,k)*fac2
+            tempy2l = tempy2l + tempy2(i,l,k)*fac2
+            tempz2l = tempz2l + tempz2(i,l,k)*fac2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+            fac3 = hprimewgll_zz(l,k)
+            tempx3l = tempx3l + tempx3(i,j,l)*fac3
+            tempy3l = tempy3l + tempy3(i,j,l)*fac3
+            tempz3l = tempz3l + tempz3(i,j,l)*fac3
+          enddo
+
+          fac1 = wgllwgll_yz(j,k)
+          fac2 = wgllwgll_xz(i,k)
+          fac3 = wgllwgll_xy(i,j)
+
+          sum_terms(1,i,j,k) = - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l)
+          sum_terms(2,i,j,k) = - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l)
+          sum_terms(3,i,j,k) = - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l)
+
+          if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
+
+        enddo ! NGLLX
+      enddo ! NGLLY
+    enddo ! NGLLZ
+
+! sum contributions from each element to the global mesh and add gravity terms
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+          iglob = ibool(i,j,k,ispec)
+          accel_crust_mantle(1,iglob) = accel_crust_mantle(1,iglob) + sum_terms(1,i,j,k)
+          accel_crust_mantle(2,iglob) = accel_crust_mantle(2,iglob) + sum_terms(2,i,j,k)
+          accel_crust_mantle(3,iglob) = accel_crust_mantle(3,iglob) + sum_terms(3,i,j,k)
+        enddo
+      enddo
+    enddo
+
+! update memory variables based upon the Runge-Kutta scheme
+! convention for attenuation
+! term in xx = 1
+! term in yy = 2
+! term in xy = 3
+! term in xz = 4
+! term in yz = 5
+! term in zz not computed since zero trace
+! This is because we only implement Q_\mu attenuation and not Q_\kappa.
+! Note that this does *NOT* imply that there is no attenuation for P waves
+! because for Q_\kappa = infinity one gets (see for instance Dahlen and Tromp (1998)
+! equation (9.59) page 350): Q_\alpha = Q_\mu * 3 * (V_p/V_s)^2 / 4
+! therefore Q_\alpha is not zero; for instance for V_p / V_s = sqrt(3)
+! we get Q_\alpha = (9 / 4) * Q_\mu = 2.25 * Q_\mu
+
+    if(ATTENUATION_VAL .and. ( PARTIAL_PHYS_DISPERSION_ONLY .eqv. .false. )) then
+
+       call compute_element_strain_att_noDev(ispec,NGLOB_CRUST_MANTLE,NSPEC_CRUST_MANTLE,displ_crust_mantle,veloc_crust_mantle,&
+                                             deltat,hprime_xx,hprime_yy,hprime_zz,ibool,&
+                                             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz,epsilondev_loc_nplus1)
+
+! use Runge-Kutta scheme to march in time
+      do i_SLS = 1,N_SLS
+        do i_memory = 1,5
+
+! get coefficients for that standard linear solid
+! IMPROVE we use mu_v here even if there is some anisotropy
+! IMPROVE we should probably use an average value instead
+
+          ! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
+          factor_common_c44_muv = factor_common(i_SLS,:,:,:,ispec)
+          if(ANISOTROPIC_3D_MANTLE_VAL) then
+            factor_common_c44_muv = factor_common_c44_muv * c44store(:,:,:,ispec)
+          else
+            factor_common_c44_muv = factor_common_c44_muv * muvstore(:,:,:,ispec)
+          endif
+
+!          R_memory(i_memory,i_SLS,:,:,:,ispec) = alphaval(i_SLS) * &
+!                    R_memory(i_memory,i_SLS,:,:,:,ispec) + &
+!                    factor_common_c44_muv * &
+!                    (betaval(i_SLS) * epsilondev_loc(i_memory,:,:,:) + &
+!                    gammaval(i_SLS) * epsilondev_loc_nplus1(i_memory,:,:,:))
+
+         if(USE_LDDRK)then
+           R_memory_lddrk(i_memory,i_SLS,:,:,:,ispec) = ALPHA_LDDRK(istage) * R_memory_lddrk(i_memory,i_SLS,:,:,:,ispec) + &
+                    deltat * (factor_common_c44_muv(:,:,:)*epsilondev_loc(i_memory,:,:,:) - &
+                              R_memory(i_memory,i_SLS,:,:,:,ispec)*(1._CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)))
+           R_memory(i_memory,i_SLS,:,:,:,ispec) = R_memory(i_memory,i_SLS,:,:,:,ispec) + &
+                                               BETA_LDDRK(istage) * R_memory_lddrk(i_memory,i_SLS,:,:,:,ispec)
+         else
+           R_memory(i_memory,i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_memory(i_memory,i_SLS,:,:,:,ispec) &
+                + factor_common_c44_muv(:,:,:) &
+                * (betaval(i_SLS) * epsilondev_loc(i_memory,:,:,:) + gammaval(i_SLS) * epsilondev_loc_nplus1(i_memory,:,:,:))
+         endif
+
+        enddo
+      enddo
+
+    endif
+
+  enddo   ! spectral element loop NSPEC_CRUST_MANTLE
+
+  end subroutine compute_forces_crust_mantle
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_noDev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_noDev.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_inner_core_noDev.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,702 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine compute_forces_inner_core(minus_gravity_table,density_table,minus_deriv_gravity_table, &
+          displ_inner_core,accel_inner_core,xstore,ystore,zstore, &
+          xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+            is_on_a_slice_edge_inner_core,icall, &
+            accel_crust_mantle,ibool_inner_core,idoubling_inner_core, &
+            myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+            iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+            npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+            iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+            iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+            npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+            iboolfaces_inner_core,iboolcorner_inner_core, &
+            iprocfrom_faces,iprocto_faces, &
+            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+            buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+            buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector,iphase, &
+            nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+            npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+            receiver_cube_from_slices,ibelm_bottom_inner_core,NSPEC2D_BOTTOM_INNER_CORE,INCLUDE_CENTRAL_CUBE,iphase_CC, &
+          hprime_xx,hprime_yy,hprime_zz, &
+          hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+          wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+          kappavstore,muvstore,ibool,idoubling, &
+          c11store,c33store,c12store,c13store,c44store,R_memory,one_minus_sum_beta,deltat,veloc_inner_core,&
+          alphaval,betaval,gammaval,factor_common, &
+          vx,vy,vz,vnspec,PARTIAL_PHYS_DISPERSION_ONLY,&
+          istage,R_memory_lddrk,tau_sigma_CUSTOM_REAL)
+
+  implicit none
+
+  include "constants.h"
+
+! include values created by the mesher
+! done for performance only using static allocation to allow for loop unrolling
+  include "OUTPUT_FILES/values_from_mesher.h"
+
+  real(kind=CUSTOM_REAL) deltat
+
+! displacement and acceleration
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: displ_inner_core,accel_inner_core,veloc_inner_core
+
+! for attenuation
+! memory variables R_ij are stored at the local rather than global level
+! to allow for optimization of cache access by compiler
+  integer i_SLS,i_memory
+  real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
+
+! variable lengths for factor_common and one_minus_sum_beta
+  integer vx, vy, vz, vnspec
+
+  real(kind=CUSTOM_REAL), dimension(vx, vy, vz, vnspec) :: one_minus_sum_beta
+
+  real(kind=CUSTOM_REAL), dimension(N_SLS, vx, vy, vz, vnspec) :: factor_common
+  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
+  real(kind=CUSTOM_REAL), dimension(NGLLX, NGLLY, NGLLZ) :: factor_common_use
+
+  real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: R_memory
+  real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc,epsilondev_loc_nplus1
+  logical :: PARTIAL_PHYS_DISPERSION_ONLY
+
+! array with the local to global mapping per slice
+  integer, dimension(NSPEC_INNER_CORE) :: idoubling
+
+! arrays with mesh parameters per slice
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: xix,xiy,xiz, &
+                      etax,etay,etaz,gammax,gammay,gammaz
+
+! array with derivatives of Lagrange polynomials and precalculated products
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
+  real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: kappavstore,muvstore
+
+!  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
+!    c11store,c33store,c12store,c13store,c44store
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_IC) :: &
+    c11store,c33store,c12store,c13store,c44store
+
+  integer ispec,iglob,ispec_strain
+  integer i,j,k,l
+
+  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+  real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
+
+  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
+
+  real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
+
+  real(kind=CUSTOM_REAL) hp1,hp2,hp3
+  real(kind=CUSTOM_REAL) fac1,fac2,fac3
+  real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
+  real(kind=CUSTOM_REAL) kappal
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sum_terms
+
+  real(kind=CUSTOM_REAL) minus_sum_beta
+  real(kind=CUSTOM_REAL) c11l,c33l,c12l,c13l,c44l
+
+  real(kind=CUSTOM_REAL) tempx1l,tempx2l,tempx3l
+  real(kind=CUSTOM_REAL) tempy1l,tempy2l,tempy3l
+  real(kind=CUSTOM_REAL) tempz1l,tempz2l,tempz3l
+
+! for gravity
+  integer int_radius
+  real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
+  double precision radius,rho,minus_g,minus_dg
+  double precision minus_g_over_radius,minus_dg_plus_g_over_radius
+  double precision cos_theta,sin_theta,cos_phi,sin_phi
+  double precision cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq
+  double precision theta,phi,factor,gxl,gyl,gzl,sx_l,sy_l,sz_l
+  double precision Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl
+  double precision, dimension(NRAD_GRAVITY) :: minus_gravity_table,density_table,minus_deriv_gravity_table
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
+  double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
+  real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: xstore,ystore,zstore
+
+! this for non blocking MPI
+  integer :: iphase,icall
+
+  integer :: computed_elements
+
+  logical, dimension(NSPEC_INNER_CORE) :: is_on_a_slice_edge_inner_core
+
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: accel_crust_mantle
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
+
+  integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
+
+  integer :: ichunk,iproc_xi,iproc_eta,myrank
+
+  integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
+
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX_CM) :: iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX_CM) :: iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle
+
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX_IC) :: iboolleft_xi_inner_core,iboolright_xi_inner_core
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX_IC) :: iboolleft_eta_inner_core,iboolright_eta_inner_core
+
+  integer npoin2D_faces_crust_mantle(NUMFACES_SHARED)
+  integer npoin2D_faces_inner_core(NUMFACES_SHARED)
+
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+       npoin2D_xi_inner_core,npoin2D_eta_inner_core
+
+! communication pattern for faces between chunks
+  integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces
+
+! communication pattern for corners between chunks
+  integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+  integer, dimension(NGLOB1D_RADIAL_CM,NUMCORNERS_SHARED) :: iboolcorner_crust_mantle
+  integer, dimension(NGLOB1D_RADIAL_IC,NUMCORNERS_SHARED) :: iboolcorner_inner_core
+
+  integer, dimension(NGLOB2DMAX_XY_CM_VAL,NUMFACES_SHARED) :: iboolfaces_crust_mantle
+  integer, dimension(NGLOB2DMAX_XY_IC_VAL,NUMFACES_SHARED) :: iboolfaces_inner_core
+
+  integer :: npoin2D_max_all_CM_IC
+  real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC) :: buffer_send_faces,buffer_received_faces
+
+! size of buffers is the sum of two sizes because we handle two regions in the same MPI call
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB1D_RADIAL_CM + NGLOB1D_RADIAL_IC) :: &
+     buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector
+
+! for matching with central cube in inner core
+  integer nb_msgs_theor_in_cube, npoin2D_cube_from_slices,iphase_CC
+  integer, dimension(nb_msgs_theor_in_cube) :: sender_from_slices_to_cube
+  double precision, dimension(npoin2D_cube_from_slices,NDIM) :: buffer_slices
+  double precision, dimension(npoin2D_cube_from_slices,NDIM,nb_msgs_theor_in_cube) :: buffer_all_cube_from_slices
+  integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices):: ibool_central_cube
+  integer receiver_cube_from_slices
+  logical :: INCLUDE_CENTRAL_CUBE
+
+! local to global mapping
+  integer NSPEC2D_BOTTOM_INNER_CORE
+  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
+  real(kind=CUSTOM_REAL) templ
+
+!for LDDRK
+  integer :: istage
+  real(kind=CUSTOM_REAL), dimension(5,N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: R_memory_lddrk
+  real(kind=CUSTOM_REAL),dimension(N_SLS) :: tau_sigma_CUSTOM_REAL
+
+! ****************************************************
+!   big loop over all spectral elements in the solid
+! ****************************************************
+
+  computed_elements = 0
+
+  do ispec = 1,NSPEC_INNER_CORE
+
+! hide communications by computing the edges first
+    if((icall == 2 .and. is_on_a_slice_edge_inner_core(ispec)) .or. &
+       (icall == 1 .and. .not. is_on_a_slice_edge_inner_core(ispec))) cycle
+
+! exclude fictitious elements in central cube
+    if(idoubling(ispec) /= IFLAG_IN_FICTITIOUS_CUBE) then
+
+! process the non-blocking communications every ELEMENTS_NONBLOCKING elements
+    computed_elements = computed_elements + 1
+    if (icall == 2 .and. mod(computed_elements,ELEMENTS_NONBLOCKING_CM_IC) == 0) then
+
+      if(iphase <= 7) call assemble_MPI_vector(myrank,accel_crust_mantle,accel_inner_core, &
+            iproc_xi,iproc_eta,ichunk,addressing, &
+            iboolleft_xi_crust_mantle,iboolright_xi_crust_mantle,iboolleft_eta_crust_mantle,iboolright_eta_crust_mantle, &
+            npoin2D_faces_crust_mantle,npoin2D_xi_crust_mantle,npoin2D_eta_crust_mantle, &
+            iboolfaces_crust_mantle,iboolcorner_crust_mantle, &
+            iboolleft_xi_inner_core,iboolright_xi_inner_core,iboolleft_eta_inner_core,iboolright_eta_inner_core, &
+            npoin2D_faces_inner_core,npoin2D_xi_inner_core,npoin2D_eta_inner_core, &
+            iboolfaces_inner_core,iboolcorner_inner_core, &
+            iprocfrom_faces,iprocto_faces, &
+            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+            buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+            buffer_send_chunkcorn_vector,buffer_recv_chunkcorn_vector, &
+            NUMMSGS_FACES_VAL,NCORNERSCHUNKS_VAL, &
+            NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL_CM, &
+            NGLOB1D_RADIAL_IC,NCHUNKS_VAL,iphase)
+
+      if(INCLUDE_CENTRAL_CUBE) then
+          if(iphase > 7 .and. iphase_CC <= 4) &
+            call assemble_MPI_central_cube(ichunk,nb_msgs_theor_in_cube,sender_from_slices_to_cube, &
+                   npoin2D_cube_from_slices,buffer_all_cube_from_slices,buffer_slices,ibool_central_cube, &
+                   receiver_cube_from_slices,ibool_inner_core,idoubling_inner_core, &
+                   ibelm_bottom_inner_core,NSPEC2D_BOTTOM_IC,accel_inner_core,NDIM,iphase_CC)
+      endif
+
+    endif
+
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempx2l = 0._CUSTOM_REAL
+          tempx3l = 0._CUSTOM_REAL
+
+          tempy1l = 0._CUSTOM_REAL
+          tempy2l = 0._CUSTOM_REAL
+          tempy3l = 0._CUSTOM_REAL
+
+          tempz1l = 0._CUSTOM_REAL
+          tempz2l = 0._CUSTOM_REAL
+          tempz3l = 0._CUSTOM_REAL
+
+          do l=1,NGLLX
+            hp1 = hprime_xx(i,l)
+            iglob = ibool(l,j,k,ispec)
+            tempx1l = tempx1l + displ_inner_core(1,iglob)*hp1
+            tempy1l = tempy1l + displ_inner_core(2,iglob)*hp1
+            tempz1l = tempz1l + displ_inner_core(3,iglob)*hp1
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+            hp2 = hprime_yy(j,l)
+            iglob = ibool(i,l,k,ispec)
+            tempx2l = tempx2l + displ_inner_core(1,iglob)*hp2
+            tempy2l = tempy2l + displ_inner_core(2,iglob)*hp2
+            tempz2l = tempz2l + displ_inner_core(3,iglob)*hp2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+            hp3 = hprime_zz(k,l)
+            iglob = ibool(i,j,l,ispec)
+            tempx3l = tempx3l + displ_inner_core(1,iglob)*hp3
+            tempy3l = tempy3l + displ_inner_core(2,iglob)*hp3
+            tempz3l = tempz3l + displ_inner_core(3,iglob)*hp3
+          enddo
+
+!         get derivatives of ux, uy and uz with respect to x, y and z
+
+          xixl = xix(i,j,k,ispec)
+          xiyl = xiy(i,j,k,ispec)
+          xizl = xiz(i,j,k,ispec)
+          etaxl = etax(i,j,k,ispec)
+          etayl = etay(i,j,k,ispec)
+          etazl = etaz(i,j,k,ispec)
+          gammaxl = gammax(i,j,k,ispec)
+          gammayl = gammay(i,j,k,ispec)
+          gammazl = gammaz(i,j,k,ispec)
+
+! compute the jacobian
+          jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
+                        - xiyl*(etaxl*gammazl-etazl*gammaxl) &
+                        + xizl*(etaxl*gammayl-etayl*gammaxl))
+
+          duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+          duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+          duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+          duydxl = xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l
+          duydyl = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l
+          duydzl = xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l
+
+          duzdxl = xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l
+          duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l
+          duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l
+
+! precompute some sums to save CPU time
+          duxdxl_plus_duydyl = duxdxl + duydyl
+          duxdxl_plus_duzdzl = duxdxl + duzdzl
+          duydyl_plus_duzdzl = duydyl + duzdzl
+          duxdyl_plus_duydxl = duxdyl + duydxl
+          duzdxl_plus_duxdzl = duzdxl + duxdzl
+          duzdyl_plus_duydzl = duzdyl + duydzl
+
+! compute deviatoric strain
+          if (COMPUTE_AND_STORE_STRAIN) then
+            if(NSPEC_INNER_CORE_STRAIN_ONLY == 1) then
+              ispec_strain = 1
+            else
+              ispec_strain = ispec
+            endif
+!ZN beware, here the expression differs from the strain used in memory variable equation (6) in D. Komatitsch and J. Tromp 1999,
+!ZN here Brian Savage uses the engineering strain which are epsilon = 1/2*(grad U + (grad U)^T),
+!ZN where U is the displacement vector and grad the gradient operator, i.e. there is a 1/2 factor difference between the two.
+!ZN Both expressions are fine, but we need to keep in mind that if we put the 1/2 factor here there we need to remove it
+!ZN from the expression in which we use the strain later in the code.
+            templ = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+            epsilondev_loc(1,i,j,k) = duxdxl - templ
+            epsilondev_loc(2,i,j,k) = duydyl - templ
+            epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl
+            epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl
+            epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
+          endif
+
+          if(ATTENUATION_VAL) then
+            minus_sum_beta =  one_minus_sum_beta(i,j,k,ispec) - 1.0
+          endif
+
+          if(ANISOTROPIC_INNER_CORE_VAL) then
+
+! elastic tensor for hexagonal symmetry in reduced notation:
+!
+!      c11 c12 c13  0   0        0
+!      c12 c11 c13  0   0        0
+!      c13 c13 c33  0   0        0
+!       0   0   0  c44  0        0
+!       0   0   0   0  c44       0
+!       0   0   0   0   0  (c11-c12)/2
+!
+!       in terms of the A, C, L, N and F of Love (1927):
+!
+!       c11 = A
+!       c12 = A-2N
+!       c13 = F
+!       c33 = C
+!       c44 = L
+
+            c11l = c11store(i,j,k,ispec)
+            c12l = c12store(i,j,k,ispec)
+            c13l = c13store(i,j,k,ispec)
+            c33l = c33store(i,j,k,ispec)
+            c44l = c44store(i,j,k,ispec)
+
+! use unrelaxed parameters if attenuation
+            if(ATTENUATION_VAL) then
+              mul = muvstore(i,j,k,ispec)
+              c11l = c11l + FOUR_THIRDS * minus_sum_beta * mul
+              c12l = c12l - TWO_THIRDS * minus_sum_beta * mul
+              c13l = c13l - TWO_THIRDS * minus_sum_beta * mul
+              c33l = c33l + FOUR_THIRDS * minus_sum_beta * mul
+              c44l = c44l + minus_sum_beta * mul
+            endif
+
+            sigma_xx = c11l*duxdxl + c12l*duydyl + c13l*duzdzl
+            sigma_yy = c12l*duxdxl + c11l*duydyl + c13l*duzdzl
+            sigma_zz = c13l*duxdxl + c13l*duydyl + c33l*duzdzl
+            sigma_xy = 0.5*(c11l-c12l)*duxdyl_plus_duydxl
+            sigma_xz = c44l*duzdxl_plus_duxdzl
+            sigma_yz = c44l*duzdyl_plus_duydzl
+          else
+
+! inner core with no anisotropy, use kappav and muv for instance
+! layer with no anisotropy, use kappav and muv for instance
+            kappal = kappavstore(i,j,k,ispec)
+            mul = muvstore(i,j,k,ispec)
+
+            ! use unrelaxed parameters if attenuation
+            if(ATTENUATION_VAL) then
+              mul = mul * one_minus_sum_beta(i,j,k,ispec)
+            endif
+
+            lambdalplus2mul = kappal + FOUR_THIRDS * mul
+            lambdal = lambdalplus2mul - 2.*mul
+
+! compute stress sigma
+
+            sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
+            sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
+            sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
+
+            sigma_xy = mul*duxdyl_plus_duydxl
+            sigma_xz = mul*duzdxl_plus_duxdzl
+            sigma_yz = mul*duzdyl_plus_duydzl
+
+          endif
+
+! subtract memory variables if attenuation
+          if(ATTENUATION_VAL .and. ( PARTIAL_PHYS_DISPERSION_ONLY .eqv. .false. ) ) then
+            do i_SLS = 1,N_SLS
+              R_xx_val = R_memory(1,i_SLS,i,j,k,ispec)
+              R_yy_val = R_memory(2,i_SLS,i,j,k,ispec)
+              sigma_xx = sigma_xx - R_xx_val
+              sigma_yy = sigma_yy - R_yy_val
+              sigma_zz = sigma_zz + R_xx_val + R_yy_val
+              sigma_xy = sigma_xy - R_memory(3,i_SLS,i,j,k,ispec)
+              sigma_xz = sigma_xz - R_memory(4,i_SLS,i,j,k,ispec)
+              sigma_yz = sigma_yz - R_memory(5,i_SLS,i,j,k,ispec)
+            enddo
+          endif
+
+! define symmetric components of sigma for gravity
+          sigma_yx = sigma_xy
+          sigma_zx = sigma_xz
+          sigma_zy = sigma_yz
+
+! compute non-symmetric terms for gravity
+          if(GRAVITY_VAL) then
+
+! use mesh coordinates to get theta and phi
+! x y and z contain r theta and phi
+
+            iglob = ibool(i,j,k,ispec)
+            radius = dble(xstore(iglob))
+            theta = dble(ystore(iglob))
+            phi = dble(zstore(iglob))
+
+! make sure radius is never zero even for points at center of cube
+! because we later divide by radius
+            if(radius < 100.d0 / R_EARTH) radius = 100.d0 / R_EARTH
+
+            cos_theta = dcos(theta)
+            sin_theta = dsin(theta)
+            cos_phi = dcos(phi)
+            sin_phi = dsin(phi)
+
+! get g, rho and dg/dr=dg
+! spherical components of the gravitational acceleration
+! for efficiency replace with lookup table every 100 m in radial direction
+! make sure we never use zero for point exactly at the center of the Earth
+            int_radius = max(1,nint(radius * R_EARTH_KM * 10.d0))
+            minus_g = minus_gravity_table(int_radius)
+            minus_dg = minus_deriv_gravity_table(int_radius)
+            rho = density_table(int_radius)
+
+! Cartesian components of the gravitational acceleration
+            gxl = minus_g*sin_theta*cos_phi
+            gyl = minus_g*sin_theta*sin_phi
+            gzl = minus_g*cos_theta
+
+! Cartesian components of gradient of gravitational acceleration
+! obtained from spherical components
+
+            minus_g_over_radius = minus_g / radius
+            minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius
+
+            cos_theta_sq = cos_theta**2
+            sin_theta_sq = sin_theta**2
+            cos_phi_sq = cos_phi**2
+            sin_phi_sq = sin_phi**2
+
+            Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq
+            Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq
+            Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq
+            Hxyl = cos_phi*minus_dg_plus_g_over_radius*sin_phi*sin_theta_sq
+            Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta
+            Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
+
+            iglob = ibool(i,j,k,ispec)
+
+! distinguish between single and double precision for reals
+            if(CUSTOM_REAL == SIZE_REAL) then
+
+! get displacement and multiply by density to compute G tensor
+              sx_l = rho * dble(displ_inner_core(1,iglob))
+              sy_l = rho * dble(displ_inner_core(2,iglob))
+              sz_l = rho * dble(displ_inner_core(3,iglob))
+
+! compute G tensor from s . g and add to sigma (not symmetric)
+              sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
+              sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
+              sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
+
+              sigma_xy = sigma_xy - sngl(sx_l * gyl)
+              sigma_yx = sigma_yx - sngl(sy_l * gxl)
+
+              sigma_xz = sigma_xz - sngl(sx_l * gzl)
+              sigma_zx = sigma_zx - sngl(sz_l * gxl)
+
+              sigma_yz = sigma_yz - sngl(sy_l * gzl)
+              sigma_zy = sigma_zy - sngl(sz_l * gyl)
+
+! precompute vector
+              factor = dble(jacobianl) * wgll_cube(i,j,k)
+              rho_s_H(1,i,j,k) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+              rho_s_H(2,i,j,k) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+              rho_s_H(3,i,j,k) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+
+            else
+
+! get displacement and multiply by density to compute G tensor
+              sx_l = rho * displ_inner_core(1,iglob)
+              sy_l = rho * displ_inner_core(2,iglob)
+              sz_l = rho * displ_inner_core(3,iglob)
+
+! compute G tensor from s . g and add to sigma (not symmetric)
+              sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
+              sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
+              sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
+
+              sigma_xy = sigma_xy - sx_l * gyl
+              sigma_yx = sigma_yx - sy_l * gxl
+
+              sigma_xz = sigma_xz - sx_l * gzl
+              sigma_zx = sigma_zx - sz_l * gxl
+
+              sigma_yz = sigma_yz - sy_l * gzl
+              sigma_zy = sigma_zy - sz_l * gyl
+
+! precompute vector
+              factor = jacobianl * wgll_cube(i,j,k)
+              rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+              rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+              rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+
+            endif
+
+          endif  ! end of section with gravity terms
+
+! form dot product with test vector, non-symmetric form
+        tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl) ! this goes to accel_x
+        tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl) ! this goes to accel_y
+        tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl) ! this goes to accel_z
+
+        tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl) ! this goes to accel_x
+        tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl) ! this goes to accel_y
+        tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl) ! this goes to accel_z
+
+        tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl) ! this goes to accel_x
+        tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl) ! this goes to accel_y
+        tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl) ! this goes to accel_z
+
+        enddo
+      enddo
+    enddo
+
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempy1l = 0._CUSTOM_REAL
+          tempz1l = 0._CUSTOM_REAL
+
+          tempx2l = 0._CUSTOM_REAL
+          tempy2l = 0._CUSTOM_REAL
+          tempz2l = 0._CUSTOM_REAL
+
+          tempx3l = 0._CUSTOM_REAL
+          tempy3l = 0._CUSTOM_REAL
+          tempz3l = 0._CUSTOM_REAL
+
+          do l=1,NGLLX
+            fac1 = hprimewgll_xx(l,i)
+            tempx1l = tempx1l + tempx1(l,j,k)*fac1
+            tempy1l = tempy1l + tempy1(l,j,k)*fac1
+            tempz1l = tempz1l + tempz1(l,j,k)*fac1
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+            fac2 = hprimewgll_yy(l,j)
+            tempx2l = tempx2l + tempx2(i,l,k)*fac2
+            tempy2l = tempy2l + tempy2(i,l,k)*fac2
+            tempz2l = tempz2l + tempz2(i,l,k)*fac2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+            fac3 = hprimewgll_zz(l,k)
+            tempx3l = tempx3l + tempx3(i,j,l)*fac3
+            tempy3l = tempy3l + tempy3(i,j,l)*fac3
+            tempz3l = tempz3l + tempz3(i,j,l)*fac3
+          enddo
+
+          fac1 = wgllwgll_yz(j,k)
+          fac2 = wgllwgll_xz(i,k)
+          fac3 = wgllwgll_xy(i,j)
+
+          sum_terms(1,i,j,k) = - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l)
+          sum_terms(2,i,j,k) = - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l)
+          sum_terms(3,i,j,k) = - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l)
+
+          if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
+
+        enddo
+      enddo
+    enddo
+
+! sum contributions from each element to the global mesh and add gravity terms
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+          iglob = ibool(i,j,k,ispec)
+          accel_inner_core(:,iglob) = accel_inner_core(:,iglob) + sum_terms(:,i,j,k)
+        enddo
+      enddo
+    enddo
+
+! use Runge-Kutta scheme to march memory variables in time
+! convention for attenuation
+! term in xx = 1
+! term in yy = 2
+! term in xy = 3
+! term in xz = 4
+! term in yz = 5
+! term in zz not computed since zero trace
+! This is because we only implement Q_\mu attenuation and not Q_\kappa.
+! Note that this does *NOT* imply that there is no attenuation for P waves
+! because for Q_\kappa = infinity one gets (see for instance Dahlen and Tromp (1998)
+! equation (9.59) page 350): Q_\alpha = Q_\mu * 3 * (V_p/V_s)^2 / 4
+! therefore Q_\alpha is not zero; for instance for V_p / V_s = sqrt(3)
+! we get Q_\alpha = (9 / 4) * Q_\mu = 2.25 * Q_\mu
+
+    if(ATTENUATION_VAL .and. ( PARTIAL_PHYS_DISPERSION_ONLY .eqv. .false. )) then
+
+       call compute_element_strain_att_noDev(ispec,NGLOB_INNER_CORE,NSPEC_INNER_CORE,displ_inner_core,&
+                                             veloc_inner_core,deltat,hprime_xx,hprime_yy,hprime_zz,ibool,&
+                                             xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz,epsilondev_loc_nplus1)
+
+      do i_SLS = 1,N_SLS
+        factor_common_use = factor_common(i_SLS,:,:,:,ispec)
+!        do i_memory = 1,5
+!          R_memory(i_memory,i_SLS,:,:,:,ispec) = &
+!                  alphaval(i_SLS) * &
+!                  R_memory(i_memory,i_SLS,:,:,:,ispec) + muvstore(:,:,:,ispec) * &
+!                  factor_common_use * &
+!                  (betaval(i_SLS) * &
+!                  epsilondev_loc_nplus1(i_memory,:,:,:) + gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
+!        enddo
+        if(USE_LDDRK)then
+          do i_memory = 1,5
+            R_memory_lddrk(i_memory,i_SLS,:,:,:,ispec) = ALPHA_LDDRK(istage) * R_memory_lddrk(i_memory,i_SLS,:,:,:,ispec) + &
+                     deltat * (muvstore(:,:,:,ispec)*factor_common_use*epsilondev_loc(i_memory,:,:,:) - &
+                               R_memory(i_memory,i_SLS,:,:,:,ispec)*(1._CUSTOM_REAL/tau_sigma_CUSTOM_REAL(i_SLS)))
+            R_memory(i_memory,i_SLS,:,:,:,ispec) = R_memory(i_memory,i_SLS,:,:,:,ispec) + &
+                                               BETA_LDDRK(istage) * R_memory_lddrk(i_memory,i_SLS,:,:,:,ispec)
+          enddo
+        else
+          do i_memory = 1,5
+            R_memory(i_memory,i_SLS,:,:,:,ispec) = &
+                   alphaval(i_SLS) * &
+                   R_memory(i_memory,i_SLS,:,:,:,ispec) + muvstore(:,:,:,ispec) * &
+                   factor_common_use * &
+                   (betaval(i_SLS) * &
+                   epsilondev_loc_nplus1(i_memory,:,:,:) + gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
+          enddo
+        endif
+
+      enddo
+
+    endif
+
+  endif   ! end test to exclude fictitious elements in central cube
+
+  enddo ! spectral element loop
+
+  end subroutine compute_forces_inner_core
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_noDev.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_noDev.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/compute_forces_outer_core_noDev.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,415 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine compute_forces_outer_core(time,deltat,two_omega_earth, &
+                          A_array_rotation,B_array_rotation, &
+                          d_ln_density_dr_table, &
+                          minus_rho_g_over_kappa_fluid,displfluid,accelfluid, &
+                          div_displfluid, &
+                          xstore,ystore,zstore, &
+                          xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+          is_on_a_slice_edge_outer_core, &
+          myrank,iproc_xi,iproc_eta,ichunk,addressing, &
+          iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
+          npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
+          iboolfaces_outer_core,iboolcorner_outer_core, &
+          iprocfrom_faces,iprocto_faces, &
+          iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+          buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+          buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar,iphase,icall, &
+                          hprime_xx,hprime_yy,hprime_zz, &
+                          hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+                          wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+                          ibool,MOVIE_VOLUME,&
+                          istage,A_array_rotation_lddrk,B_array_rotation_lddrk)
+
+  implicit none
+
+  include "constants.h"
+
+! include values created by the mesher
+! done for performance only using static allocation to allow for loop unrolling
+  include "OUTPUT_FILES/values_from_mesher.h"
+
+! displacement and acceleration
+  real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: displfluid,accelfluid
+
+! divergence of displacement
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: div_displfluid
+
+! arrays with mesh parameters per slice
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec_outer_core) :: ibool
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec_outer_core) :: xix,xiy,xiz, &
+                      etax,etay,etaz,gammax,gammay,gammaz
+
+! array with derivatives of Lagrange polynomials and precalculated products
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
+  real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+  double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
+
+  logical MOVIE_VOLUME
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
+
+! for gravity
+  integer int_radius
+  double precision radius,theta,phi,gxl,gyl,gzl
+  double precision cos_theta,sin_theta,cos_phi,sin_phi
+  double precision, dimension(NRAD_GRAVITY) :: minus_rho_g_over_kappa_fluid
+  double precision, dimension(NRAD_GRAVITY) :: d_ln_density_dr_table
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: gravity_term
+  real(kind=CUSTOM_REAL), dimension(nglob_outer_core) :: xstore,ystore,zstore
+
+! for the Euler scheme for rotation
+  real(kind=CUSTOM_REAL) time,deltat,two_omega_earth
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
+    A_array_rotation,B_array_rotation
+
+  real(kind=CUSTOM_REAL) two_omega_deltat,cos_two_omega_t,sin_two_omega_t,A_rotation,B_rotation, &
+       ux_rotation,uy_rotation,dpotentialdx_with_rot,dpotentialdy_with_rot
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: source_euler_A,source_euler_B
+
+  integer ispec,iglob
+  integer i,j,k,l
+
+  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+  real(kind=CUSTOM_REAL) dpotentialdxl,dpotentialdyl,dpotentialdzl
+  real(kind=CUSTOM_REAL) tempx1l,tempx2l,tempx3l,sum_terms
+
+  double precision grad_x_ln_rho,grad_y_ln_rho,grad_z_ln_rho
+
+! this for non blocking MPI
+  integer :: ichunk,iproc_xi,iproc_eta,myrank
+
+  integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
+
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX_OC) :: iboolleft_xi_outer_core,iboolright_xi_outer_core
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX_OC) :: iboolleft_eta_outer_core,iboolright_eta_outer_core
+
+  integer npoin2D_faces_outer_core(NUMFACES_SHARED)
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi_outer_core,npoin2D_eta_outer_core
+
+! communication pattern for faces between chunks
+  integer, dimension(NUMMSGS_FACES_VAL) :: iprocfrom_faces,iprocto_faces
+
+! communication pattern for corners between chunks
+  integer, dimension(NCORNERSCHUNKS_VAL) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+! indirect addressing for each message for faces and corners of the chunks
+! a given slice can belong to at most one corner and at most two faces
+  integer, dimension(NGLOB2DMAX_XY_OC_VAL,NUMFACES_SHARED) :: iboolfaces_outer_core
+
+! buffers for send and receive between faces of the slices and the chunks
+! we use the same buffers to assemble scalars and vectors because vectors are
+! always three times bigger and therefore scalars can use the first part
+! of the vector buffer in memory even if it has an additional index here
+  integer :: npoin2D_max_all_CM_IC
+  real(kind=CUSTOM_REAL), dimension(NDIM,npoin2D_max_all_CM_IC,NUMFACES_SHARED) :: buffer_send_faces,buffer_received_faces
+
+  integer, dimension(NGLOB1D_RADIAL_OC,NUMCORNERS_SHARED) :: iboolcorner_outer_core
+
+  real(kind=CUSTOM_REAL), dimension(NGLOB1D_RADIAL_OC) :: buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar
+
+  logical, dimension(NSPEC_OUTER_CORE) :: is_on_a_slice_edge_outer_core
+
+  integer :: iphase,icall
+
+  integer :: computed_elements
+
+! for LDDRK
+  integer :: istage
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
+            A_array_rotation_lddrk,B_array_rotation_lddrk
+
+! ****************************************************
+!   big loop over all spectral elements in the fluid
+! ****************************************************
+  if(istage == 1) then
+    if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. icall == 1) div_displfluid(:,:,:,:) = 0._CUSTOM_REAL
+  endif
+
+  computed_elements = 0
+
+  do ispec = 1,NSPEC_OUTER_CORE
+
+! hide communications by computing the edges first
+    if((icall == 2 .and. is_on_a_slice_edge_outer_core(ispec)) .or. &
+       (icall == 1 .and. .not. is_on_a_slice_edge_outer_core(ispec))) cycle
+
+! process the non-blocking communications every ELEMENTS_NONBLOCKING elements
+    computed_elements = computed_elements + 1
+    if (icall == 2 .and. mod(computed_elements,ELEMENTS_NONBLOCKING_OC) == 0 .and. iphase <= 7) &
+      call assemble_MPI_scalar(myrank,accelfluid,NGLOB_OUTER_CORE, &
+            iproc_xi,iproc_eta,ichunk,addressing, &
+            iboolleft_xi_outer_core,iboolright_xi_outer_core,iboolleft_eta_outer_core,iboolright_eta_outer_core, &
+            npoin2D_faces_outer_core,npoin2D_xi_outer_core,npoin2D_eta_outer_core, &
+            iboolfaces_outer_core,iboolcorner_outer_core, &
+            iprocfrom_faces,iprocto_faces, &
+            iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+            buffer_send_faces,buffer_received_faces,npoin2D_max_all_CM_IC, &
+            buffer_send_chunkcorn_scalar,buffer_recv_chunkcorn_scalar, &
+            NUMMSGS_FACES_VAL,NCORNERSCHUNKS_VAL, &
+            NPROC_XI_VAL,NPROC_ETA_VAL,NGLOB1D_RADIAL_OC, &
+            NGLOB2DMAX_XMIN_XMAX_OC,NGLOB2DMAX_YMIN_YMAX_OC, &
+            NGLOB2DMAX_XY_OC_VAL,NCHUNKS_VAL,iphase)
+
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempx2l = 0._CUSTOM_REAL
+          tempx3l = 0._CUSTOM_REAL
+
+          do l=1,NGLLX
+            !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+            tempx1l = tempx1l + displfluid(ibool(l,j,k,ispec)) * hprime_xx(i,l)
+            tempx2l = tempx2l + displfluid(ibool(i,l,k,ispec)) * hprime_yy(j,l)
+            tempx3l = tempx3l + displfluid(ibool(i,j,l,ispec)) * hprime_zz(k,l)
+          enddo
+
+          ! get derivatives of velocity potential with respect to x, y and z
+          xixl = xix(i,j,k,ispec)
+          xiyl = xiy(i,j,k,ispec)
+          xizl = xiz(i,j,k,ispec)
+          etaxl = etax(i,j,k,ispec)
+          etayl = etay(i,j,k,ispec)
+          etazl = etaz(i,j,k,ispec)
+          gammaxl = gammax(i,j,k,ispec)
+          gammayl = gammay(i,j,k,ispec)
+          gammazl = gammaz(i,j,k,ispec)
+
+          ! compute the jacobian
+          jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
+                        - xiyl*(etaxl*gammazl-etazl*gammaxl) &
+                        + xizl*(etaxl*gammayl-etayl*gammaxl))
+
+          dpotentialdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+          dpotentialdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+          dpotentialdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+          ! compute contribution of rotation and add to gradient of potential
+          ! this term has no Z component
+          if(ROTATION_VAL) then
+
+            ! store the source for the Euler scheme for A_rotation and B_rotation
+            two_omega_deltat = deltat * two_omega_earth
+
+            cos_two_omega_t = cos(two_omega_earth*time)
+            sin_two_omega_t = sin(two_omega_earth*time)
+
+            ! time step deltat of Euler scheme is included in the source
+            source_euler_A(i,j,k) = two_omega_deltat * (cos_two_omega_t * dpotentialdyl + sin_two_omega_t * dpotentialdxl)
+            source_euler_B(i,j,k) = two_omega_deltat * (sin_two_omega_t * dpotentialdyl - cos_two_omega_t * dpotentialdxl)
+
+            A_rotation = A_array_rotation(i,j,k,ispec)
+            B_rotation = B_array_rotation(i,j,k,ispec)
+
+            ux_rotation =   A_rotation*cos_two_omega_t + B_rotation*sin_two_omega_t
+            uy_rotation = - A_rotation*sin_two_omega_t + B_rotation*cos_two_omega_t
+
+            dpotentialdx_with_rot = dpotentialdxl + ux_rotation
+            dpotentialdy_with_rot = dpotentialdyl + uy_rotation
+
+          else
+
+            dpotentialdx_with_rot = dpotentialdxl
+            dpotentialdy_with_rot = dpotentialdyl
+
+          endif  ! end of section with rotation
+
+          ! add (chi/rho)grad(rho) term in no gravity case
+          if(.not. GRAVITY_VAL) then
+            ! With regards to the non-gravitating case: we cannot set N^2 = 0 *and* let g = 0.
+            ! We can *either* assume N^2 = 0 but keep gravity g, *or* we can assume that gravity
+            ! is negligible to begin with, as in our GJI 2002a, in which case N does not arise.
+            ! We get:
+            !
+            ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
+            !
+            ! Then the displacement is
+            !
+            ! \bu = \bdel\chi+\chi\bdel\ln\rho = \rho^{-1}\bdel(\rho\chi)
+            !
+            ! and the pressure is
+            !
+            ! p = -\rho\ddot{\chi}
+            !
+            ! Thus in our 2002b GJI paper eqn (21) is wrong, and equation (41)
+            ! in our AGU monograph is incorrect; these equations should be replaced by
+            !
+            ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
+            !
+            ! Note that the fluid potential we use in GJI 2002a differs from the one used here:
+            !
+            ! \chi_GJI2002a = \rho\partial\t\chi
+            !
+            ! such that
+            !
+            ! \bv = \partial_t\bu=\rho^{-1}\bdel\chi_GJI2002a  (GJI 2002a eqn 20)
+            !
+            ! p = - \partial_t\chi_GJI2002a (GJI 2002a eqn 19)
+
+            ! use mesh coordinates to get theta and phi
+            ! x y z contain r theta phi
+            iglob = ibool(i,j,k,ispec)
+
+            radius = dble(xstore(iglob))
+            theta = dble(ystore(iglob))
+            phi = dble(zstore(iglob))
+
+            cos_theta = dcos(theta)
+            sin_theta = dsin(theta)
+            cos_phi = dcos(phi)
+            sin_phi = dsin(phi)
+
+            int_radius = nint(radius * R_EARTH_KM * 10.d0)
+
+            ! grad(rho)/rho in Cartesian components
+            grad_x_ln_rho = sin_theta * cos_phi * d_ln_density_dr_table(int_radius)
+            grad_y_ln_rho = sin_theta * sin_phi * d_ln_density_dr_table(int_radius)
+            grad_z_ln_rho = cos_theta * d_ln_density_dr_table(int_radius)
+
+            ! adding (chi/rho)grad(rho)
+            dpotentialdx_with_rot = dpotentialdx_with_rot + displfluid(iglob) * grad_x_ln_rho
+            dpotentialdy_with_rot = dpotentialdy_with_rot + displfluid(iglob) * grad_y_ln_rho
+            dpotentialdzl = dpotentialdzl + displfluid(iglob) * grad_z_ln_rho
+
+
+         else  ! if gravity is turned on
+
+            ! compute divergence of displacment
+            ! precompute and store gravity term
+
+            ! use mesh coordinates to get theta and phi
+            ! x y z contain r theta phi
+            iglob = ibool(i,j,k,ispec)
+
+            radius = dble(xstore(iglob))
+            theta = dble(ystore(iglob))
+            phi = dble(zstore(iglob))
+
+            cos_theta = dcos(theta)
+            sin_theta = dsin(theta)
+            cos_phi = dcos(phi)
+            sin_phi = dsin(phi)
+
+            ! get g, rho and dg/dr=dg
+            ! spherical components of the gravitational acceleration
+            ! for efficiency replace with lookup table every 100 m in radial direction
+            int_radius = nint(radius * R_EARTH_KM * 10.d0)
+
+            ! Cartesian components of the gravitational acceleration
+            ! integrate and multiply by rho / Kappa
+            gxl = sin_theta*cos_phi
+            gyl = sin_theta*sin_phi
+            gzl = cos_theta
+
+            ! distinguish between single and double precision for reals
+            if(CUSTOM_REAL == SIZE_REAL) then
+              gravity_term(i,j,k) = &
+                sngl(minus_rho_g_over_kappa_fluid(int_radius) * &
+                dble(jacobianl) * wgll_cube(i,j,k) * &
+               (dble(dpotentialdx_with_rot) * gxl + &
+                dble(dpotentialdy_with_rot) * gyl + dble(dpotentialdzl) * gzl))
+            else
+              gravity_term(i,j,k) = minus_rho_g_over_kappa_fluid(int_radius) * &
+                 jacobianl * wgll_cube(i,j,k) * (dpotentialdx_with_rot * gxl + &
+                 dpotentialdy_with_rot * gyl + dpotentialdzl * gzl)
+            endif
+
+            if(istage == 1) then
+              ! divergence of displacement field with gravity on
+              ! note: these calculations are only considered for SIMULATION_TYPE == 1 .and. SAVE_FORWARD
+              !          and one has set MOVIE_VOLUME_TYPE == 4 when MOVIE_VOLUME is .true.;
+              !         in case of SIMULATION_TYPE == 3, it gets overwritten by compute_kernels_outer_core()
+              if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. MOVIE_VOLUME ) then
+                div_displfluid(i,j,k,ispec) =  &
+                   minus_rho_g_over_kappa_fluid(int_radius) * (dpotentialdx_with_rot * gxl + &
+                   dpotentialdy_with_rot * gyl + dpotentialdzl * gzl)
+              endif
+            endif
+
+          endif
+
+          tempx1(i,j,k) = jacobianl*(xixl*dpotentialdx_with_rot + xiyl*dpotentialdy_with_rot + xizl*dpotentialdzl)
+          tempx2(i,j,k) = jacobianl*(etaxl*dpotentialdx_with_rot + etayl*dpotentialdy_with_rot + etazl*dpotentialdzl)
+          tempx3(i,j,k) = jacobianl*(gammaxl*dpotentialdx_with_rot + gammayl*dpotentialdy_with_rot + gammazl*dpotentialdzl)
+
+        enddo
+      enddo
+    enddo
+
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempx2l = 0._CUSTOM_REAL
+          tempx3l = 0._CUSTOM_REAL
+
+          do l=1,NGLLX
+            !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+            tempx1l = tempx1l + tempx1(l,j,k) * hprimewgll_xx(l,i)
+            tempx2l = tempx2l + tempx2(i,l,k) * hprimewgll_yy(l,j)
+            tempx3l = tempx3l + tempx3(i,j,l) * hprimewgll_zz(l,k)
+          enddo
+
+          ! sum contributions from each element to the global mesh and add gravity term
+          sum_terms = - (wgllwgll_yz(j,k)*tempx1l + wgllwgll_xz(i,k)*tempx2l + wgllwgll_xy(i,j)*tempx3l)
+          if(GRAVITY_VAL) sum_terms = sum_terms + gravity_term(i,j,k)
+
+          accelfluid(ibool(i,j,k,ispec)) = accelfluid(ibool(i,j,k,ispec)) + sum_terms
+
+        enddo
+      enddo
+    enddo
+
+    ! update rotation term with Euler scheme
+    if(ROTATION_VAL) then
+      if(USE_LDDRK)then
+        ! use the source saved above
+        A_array_rotation_lddrk(:,:,:,ispec) = ALPHA_LDDRK(istage) * A_array_rotation_lddrk(:,:,:,ispec) + source_euler_A(:,:,:)
+        A_array_rotation(:,:,:,ispec) = A_array_rotation(:,:,:,ispec) + BETA_LDDRK(istage) * A_array_rotation_lddrk(:,:,:,ispec)
+
+        B_array_rotation_lddrk(:,:,:,ispec) = ALPHA_LDDRK(istage) * B_array_rotation_lddrk(:,:,:,ispec) + source_euler_B(:,:,:)
+        B_array_rotation(:,:,:,ispec) = B_array_rotation(:,:,:,ispec) + BETA_LDDRK(istage) * B_array_rotation_lddrk(:,:,:,ispec)
+      else
+        ! use the source saved above
+        A_array_rotation(:,:,:,ispec) = A_array_rotation(:,:,:,ispec) + source_euler_A(:,:,:)
+        B_array_rotation(:,:,:,ispec) = B_array_rotation(:,:,:,ispec) + source_euler_B(:,:,:)
+      endif
+    endif
+
+  enddo   ! spectral element loop
+
+  end subroutine compute_forces_outer_core
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/create_central_cube_buffers.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/create_central_cube_buffers.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/create_central_cube_buffers.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,541 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!
+!--- create buffers to assemble with central cube
+!
+
+  subroutine create_central_cube_buffers(myrank,iproc_xi,iproc_eta,ichunk, &
+       NPROC_XI,NPROC_ETA,NCHUNKS,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+       NSPEC2DMAX_XMIN_XMAX_INNER_CORE,NSPEC2DMAX_YMIN_YMAX_INNER_CORE,NSPEC2D_BOTTOM_INNER_CORE, &
+       addressing,ibool_inner_core,idoubling_inner_core, &
+       xstore_inner_core,ystore_inner_core,zstore_inner_core, &
+       nspec2D_xmin_inner_core,nspec2D_xmax_inner_core,nspec2D_ymin_inner_core,nspec2D_ymax_inner_core, &
+       ibelm_xmin_inner_core,ibelm_xmax_inner_core,ibelm_ymin_inner_core,ibelm_ymax_inner_core,ibelm_bottom_inner_core, &
+       nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices, &
+       receiver_cube_from_slices,sender_from_slices_to_cube,ibool_central_cube, &
+       buffer_slices,buffer_slices2,buffer_all_cube_from_slices)
+
+  implicit none
+
+! standard include of the MPI library
+  include 'mpif.h'
+
+  include "constants.h"
+
+  integer, intent(in) :: myrank,iproc_xi,iproc_eta,ichunk, &
+       NPROC_XI,NPROC_ETA,NCHUNKS,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+       NSPEC2DMAX_XMIN_XMAX_INNER_CORE,NSPEC2DMAX_YMIN_YMAX_INNER_CORE,NSPEC2D_BOTTOM_INNER_CORE
+
+! for addressing of the slices
+  integer, dimension(NCHUNKS,0:NPROC_XI-1,0:NPROC_ETA-1), intent(in) :: addressing
+
+! mesh parameters
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE), intent(in) :: ibool_inner_core
+
+! local to global mapping
+  integer, dimension(NSPEC_INNER_CORE), intent(in) :: idoubling_inner_core
+
+  real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE), intent(in) :: xstore_inner_core,ystore_inner_core,zstore_inner_core
+
+! boundary parameters locator
+  integer, intent(in) :: nspec2D_xmin_inner_core,nspec2D_xmax_inner_core,nspec2D_ymin_inner_core,nspec2D_ymax_inner_core
+  integer, dimension(NSPEC2DMAX_XMIN_XMAX_INNER_CORE), intent(in) :: ibelm_xmin_inner_core,ibelm_xmax_inner_core
+  integer, dimension(NSPEC2DMAX_YMIN_YMAX_INNER_CORE), intent(in) :: ibelm_ymin_inner_core,ibelm_ymax_inner_core
+  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE), intent(in) :: ibelm_bottom_inner_core
+
+  integer, intent(in) :: nb_msgs_theor_in_cube,non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices
+
+! for matching with central cube in inner core
+  integer, intent(out) :: receiver_cube_from_slices
+
+  integer, dimension(non_zero_nb_msgs_theor_in_cube), intent(out) :: sender_from_slices_to_cube
+  integer, dimension(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices), intent(out) :: ibool_central_cube
+  double precision, dimension(npoin2D_cube_from_slices,NDIM), intent(out) :: buffer_slices,buffer_slices2
+  double precision, dimension(non_zero_nb_msgs_theor_in_cube,npoin2D_cube_from_slices,NDIM), intent(out) :: &
+        buffer_all_cube_from_slices
+
+! local variables below
+  integer i,j,k,ispec,ispec2D,iglob,ier
+  integer sender,receiver,imsg,ipoin,iproc_xi_loop
+
+  double precision x_target,y_target,z_target
+  double precision x_current,y_current,z_current
+
+! MPI status of messages to be received
+  integer msg_status(MPI_STATUS_SIZE)
+
+!--- processor to send information to in cube from slices
+
+! four vertical sides first
+  if(ichunk == CHUNK_AC) then
+    if (iproc_xi < floor(NPROC_XI/2.d0)) then
+      receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,NPROC_XI-1,iproc_eta)
+    else
+      receiver_cube_from_slices = addressing(CHUNK_AB,0,iproc_eta)
+    endif
+  else if(ichunk == CHUNK_BC) then
+    if (iproc_xi < floor(NPROC_XI/2.d0)) then
+      receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,NPROC_XI-1-iproc_eta,NPROC_ETA-1)
+    else
+      receiver_cube_from_slices = addressing(CHUNK_AB,iproc_eta,NPROC_ETA-1)
+    endif
+  else if(ichunk == CHUNK_AC_ANTIPODE) then
+    if (iproc_xi <= ceiling((NPROC_XI/2.d0)-1)) then
+      receiver_cube_from_slices = addressing(CHUNK_AB,NPROC_XI-1,iproc_eta)
+    else
+      receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,0,iproc_eta)
+    endif
+  else if(ichunk == CHUNK_BC_ANTIPODE) then
+    if (iproc_xi < floor(NPROC_XI/2.d0)) then
+      receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,iproc_eta,0)
+    else
+      receiver_cube_from_slices = addressing(CHUNK_AB,NPROC_XI-1-iproc_eta,0)
+    endif
+! bottom of cube, direct correspondance but with inverted xi axis
+  else if(ichunk == CHUNK_AB_ANTIPODE) then
+    receiver_cube_from_slices = addressing(CHUNK_AB,NPROC_XI-1-iproc_xi,iproc_eta)
+  else if(ichunk == CHUNK_AB) then
+    receiver_cube_from_slices = addressing(CHUNK_AB_ANTIPODE,NPROC_XI-1-iproc_xi,iproc_eta)
+  endif
+
+
+!--- list of processors to receive information from in cube
+
+! only for slices in central cube
+  if(ichunk == CHUNK_AB) then
+
+! initialize index of sender
+    imsg = 0
+
+! define sender for xi = xi_min edge
+    if(iproc_xi == 0) then
+      do iproc_xi_loop = floor(NPROC_XI/2.d0),NPROC_XI-1
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC,iproc_xi_loop,iproc_eta)
+      enddo
+    endif
+
+! define sender for xi = xi_max edge
+    if(iproc_xi == NPROC_XI-1) then
+      do iproc_xi_loop = 0, floor((NPROC_XI-1)/2.d0)
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC_ANTIPODE,iproc_xi_loop,iproc_eta)
+      enddo
+    endif
+
+! define sender for eta = eta_min edge
+    if(iproc_eta == 0) then
+      do iproc_xi_loop = floor(NPROC_XI/2.d0),NPROC_XI-1
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC_ANTIPODE,iproc_xi_loop,NPROC_ETA-1-iproc_xi)
+      enddo
+    endif
+
+! define sender for eta = eta_max edge
+    if(iproc_eta == NPROC_ETA-1) then
+      do iproc_xi_loop = floor(NPROC_XI/2.d0),NPROC_XI-1
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC,iproc_xi_loop,iproc_xi)
+      enddo
+    endif
+
+! define sender for bottom edge
+! bottom of cube, direct correspondence but with inverted xi axis
+    imsg = imsg + 1
+    sender_from_slices_to_cube(imsg) = addressing(CHUNK_AB_ANTIPODE,NPROC_XI-1-iproc_xi,iproc_eta)
+
+! check that total number of faces found is correct
+   if(imsg /= nb_msgs_theor_in_cube) call exit_MPI(myrank,'wrong number of faces found for central cube')
+
+  else if(ichunk == CHUNK_AB_ANTIPODE) then
+
+! initialize index of sender
+    imsg = 0
+
+! define sender for xi = xi_min edge
+    if(iproc_xi == 0) then
+      do iproc_xi_loop = ceiling(NPROC_XI/2.d0),NPROC_XI-1
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC_ANTIPODE,iproc_xi_loop,iproc_eta)
+      enddo
+    endif
+
+! define sender for xi = xi_max edge
+    if(iproc_xi == NPROC_XI-1) then
+      do iproc_xi_loop = 0, floor((NPROC_XI/2.d0)-1.d0)
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_AC,iproc_xi_loop,iproc_eta)
+      enddo
+    endif
+
+! define sender for eta = eta_min edge
+    if(iproc_eta == 0) then
+      do iproc_xi_loop = 0, floor((NPROC_XI/2.d0)-1.d0)
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC_ANTIPODE,iproc_xi_loop,iproc_xi)
+      enddo
+    endif
+
+! define sender for eta = eta_max edge
+    if(iproc_eta == NPROC_ETA-1) then
+      do iproc_xi_loop = 0, floor((NPROC_XI/2.d0)-1.d0)
+        imsg = imsg + 1
+        sender_from_slices_to_cube(imsg) = addressing(CHUNK_BC,iproc_xi_loop,NPROC_ETA-1-iproc_xi)
+      enddo
+    endif
+
+! define sender for bottom edge
+! bottom of cube, direct correspondence but with inverted xi axis
+    imsg = imsg + 1
+    sender_from_slices_to_cube(imsg) = addressing(CHUNK_AB,NPROC_XI-1-iproc_xi,iproc_eta)
+
+! check that total number of faces found is correct
+   if(imsg /= nb_msgs_theor_in_cube) call exit_MPI(myrank,'wrong number of faces found for central cube')
+
+  else
+
+! dummy value in slices
+    sender_from_slices_to_cube(1) = -1
+
+  endif
+
+
+! on chunk AB & AB ANTIPODE, receive all (except bottom) the messages from slices
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+    do imsg = 1,nb_msgs_theor_in_cube-1
+
+! receive buffers from slices
+    sender = sender_from_slices_to_cube(imsg)
+    call MPI_RECV(buffer_slices, &
+              NDIM*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
+              itag,MPI_COMM_WORLD,msg_status,ier)
+
+! copy buffer in 2D array for each slice
+    buffer_all_cube_from_slices(imsg,:,:) = buffer_slices(:,:)
+
+    enddo
+  endif
+
+! send info to central cube from all the slices except those in CHUNK_AB & CHUNK_AB_ANTIPODE
+  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE ) then
+
+! for bottom elements in contact with central cube from the slices side
+    ipoin = 0
+    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+
+      ispec = ibelm_bottom_inner_core(ispec2D)
+
+! only for DOFs exactly on surface of central cube (bottom of these elements)
+      k = 1
+      do j = 1,NGLLY
+        do i = 1,NGLLX
+          ipoin = ipoin + 1
+          iglob = ibool_inner_core(i,j,k,ispec)
+          buffer_slices(ipoin,1) = dble(xstore_inner_core(iglob))
+          buffer_slices(ipoin,2) = dble(ystore_inner_core(iglob))
+          buffer_slices(ipoin,3) = dble(zstore_inner_core(iglob))
+        enddo
+      enddo
+    enddo
+
+! send buffer to central cube
+    receiver = receiver_cube_from_slices
+    call MPI_SEND(buffer_slices,NDIM*npoin2D_cube_from_slices, &
+              MPI_DOUBLE_PRECISION,receiver,itag,MPI_COMM_WORLD,ier)
+
+ endif  ! end sending info to central cube
+
+
+! exchange of their bottom faces between chunks AB and AB_ANTIPODE
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+    ipoin = 0
+    do ispec = NSPEC_INNER_CORE, 1, -1
+      if (idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE) then
+        k = 1
+        do j = 1,NGLLY
+          do i = 1,NGLLX
+            ipoin = ipoin + 1
+            iglob = ibool_inner_core(i,j,k,ispec)
+            buffer_slices(ipoin,1) = dble(xstore_inner_core(iglob))
+            buffer_slices(ipoin,2) = dble(ystore_inner_core(iglob))
+            buffer_slices(ipoin,3) = dble(zstore_inner_core(iglob))
+          enddo
+        enddo
+      endif
+    enddo
+    if (ipoin /= npoin2D_cube_from_slices) call exit_MPI(myrank,'wrong number of points found for bottom CC AB or !AB')
+
+    sender = sender_from_slices_to_cube(nb_msgs_theor_in_cube)
+
+    call MPI_SENDRECV(buffer_slices,NDIM*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,receiver_cube_from_slices, &
+        itag,buffer_slices2,NDIM*npoin2D_cube_from_slices,MPI_DOUBLE_PRECISION,sender, &
+        itag,MPI_COMM_WORLD,msg_status,ier)
+
+    buffer_all_cube_from_slices(nb_msgs_theor_in_cube,:,:) = buffer_slices2(:,:)
+
+  endif
+
+!--- now we need to find the points received and create indirect addressing
+
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+   do imsg = 1,nb_msgs_theor_in_cube
+
+   do ipoin = 1,npoin2D_cube_from_slices
+
+     x_target = buffer_all_cube_from_slices(imsg,ipoin,1)
+     y_target = buffer_all_cube_from_slices(imsg,ipoin,2)
+     z_target = buffer_all_cube_from_slices(imsg,ipoin,3)
+
+! x = x_min
+  do ispec2D = 1,nspec2D_xmin_inner_core
+
+      ispec = ibelm_xmin_inner_core(ispec2D)
+
+! do not loop on elements outside of the central cube
+     if(idoubling_inner_core(ispec) /= IFLAG_MIDDLE_CENTRAL_CUBE .and. &
+        idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE .and. &
+        idoubling_inner_core(ispec) /= IFLAG_TOP_CENTRAL_CUBE) cycle
+
+     i = 1
+     do k = 1,NGLLZ
+       do j = 1,NGLLY
+
+         iglob = ibool_inner_core(i,j,k,ispec)
+         x_current = dble(xstore_inner_core(iglob))
+         y_current = dble(ystore_inner_core(iglob))
+         z_current = dble(zstore_inner_core(iglob))
+
+! look for matching point
+         if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
+           ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
+           goto 100
+         endif
+
+       enddo
+     enddo
+
+   enddo
+
+! x = x_max
+  do ispec2D = 1,nspec2D_xmax_inner_core
+
+      ispec = ibelm_xmax_inner_core(ispec2D)
+
+! do not loop on elements outside of the central cube
+     if(idoubling_inner_core(ispec) /= IFLAG_MIDDLE_CENTRAL_CUBE .and. &
+        idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE .and. &
+        idoubling_inner_core(ispec) /= IFLAG_TOP_CENTRAL_CUBE) cycle
+
+     i = NGLLX
+     do k = 1,NGLLZ
+       do j = 1,NGLLY
+
+         iglob = ibool_inner_core(i,j,k,ispec)
+         x_current = dble(xstore_inner_core(iglob))
+         y_current = dble(ystore_inner_core(iglob))
+         z_current = dble(zstore_inner_core(iglob))
+
+! look for matching point
+         if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
+           ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
+           goto 100
+         endif
+
+       enddo
+     enddo
+
+   enddo
+
+! y = y_min
+  do ispec2D = 1,nspec2D_ymin_inner_core
+
+      ispec = ibelm_ymin_inner_core(ispec2D)
+
+! do not loop on elements outside of the central cube
+     if(idoubling_inner_core(ispec) /= IFLAG_MIDDLE_CENTRAL_CUBE .and. &
+        idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE .and. &
+        idoubling_inner_core(ispec) /= IFLAG_TOP_CENTRAL_CUBE) cycle
+
+     j = 1
+     do k = 1,NGLLZ
+       do i = 1,NGLLX
+
+         iglob = ibool_inner_core(i,j,k,ispec)
+         x_current = dble(xstore_inner_core(iglob))
+         y_current = dble(ystore_inner_core(iglob))
+         z_current = dble(zstore_inner_core(iglob))
+
+! look for matching point
+         if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
+           ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
+           goto 100
+         endif
+
+       enddo
+     enddo
+
+   enddo
+
+! y = y_max
+  do ispec2D = 1,nspec2D_ymax_inner_core
+
+      ispec = ibelm_ymax_inner_core(ispec2D)
+
+! do not loop on elements outside of the central cube
+     if(idoubling_inner_core(ispec) /= IFLAG_MIDDLE_CENTRAL_CUBE .and. &
+        idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE .and. &
+        idoubling_inner_core(ispec) /= IFLAG_TOP_CENTRAL_CUBE) cycle
+
+     j = NGLLY
+     do k = 1,NGLLZ
+       do i = 1,NGLLX
+
+         iglob = ibool_inner_core(i,j,k,ispec)
+         x_current = dble(xstore_inner_core(iglob))
+         y_current = dble(ystore_inner_core(iglob))
+         z_current = dble(zstore_inner_core(iglob))
+
+! look for matching point
+         if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
+           ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
+           goto 100
+         endif
+
+       enddo
+     enddo
+
+   enddo
+
+! bottom of cube
+  do ispec = 1,NSPEC_INNER_CORE
+
+! loop on elements at the bottom of the cube only
+     if(idoubling_inner_core(ispec) /= IFLAG_BOTTOM_CENTRAL_CUBE) cycle
+
+     k = 1
+     do j = 1,NGLLY
+       do i = 1,NGLLX
+
+         iglob = ibool_inner_core(i,j,k,ispec)
+         x_current = dble(xstore_inner_core(iglob))
+         y_current = dble(ystore_inner_core(iglob))
+         z_current = dble(zstore_inner_core(iglob))
+
+! look for matching point
+         if(dsqrt((x_current-x_target)**2 + (y_current-y_target)**2 + (z_current-z_target)**2) < SMALLVALTOL) then
+           ibool_central_cube(imsg,ipoin) = ibool_inner_core(i,j,k,ispec)
+           goto 100
+         endif
+
+       enddo
+     enddo
+
+   enddo
+
+! check that a matching point is found in all cases
+  call exit_MPI(myrank,'point never found in central cube')
+
+ 100 continue
+
+   enddo
+   enddo
+   endif
+
+  end subroutine create_central_cube_buffers
+
+!
+!----------------------------------
+!
+
+  subroutine comp_central_cube_buffer_size(iproc_xi,iproc_eta,ichunk,NPROC_XI,NPROC_ETA,NSPEC2D_BOTTOM_INNER_CORE, &
+                nb_msgs_theor_in_cube,npoin2D_cube_from_slices)
+
+!--- compute number of messages to expect in cube as well as their size
+!--- take into account vertical sides and bottom side
+
+  implicit none
+
+  include "constants.h"
+
+  integer, intent(in) :: iproc_xi,iproc_eta,ichunk,NPROC_XI,NPROC_ETA,NSPEC2D_BOTTOM_INNER_CORE
+
+  integer, intent(out) :: nb_msgs_theor_in_cube,npoin2D_cube_from_slices
+
+! only for slices in central cube
+  if(ichunk == CHUNK_AB) then
+    if(NPROC_XI == 1) then
+! five sides if only one processor in cube
+      nb_msgs_theor_in_cube = 5
+    else
+! case of a corner
+      if((iproc_xi == 0 .or. iproc_xi == NPROC_XI-1).and. &
+         (iproc_eta == 0 .or. iproc_eta == NPROC_ETA-1)) then
+! slices on both "vertical" faces plus one slice at the bottom
+        nb_msgs_theor_in_cube = 2*(ceiling(NPROC_XI/2.d0)) + 1
+! case of an edge
+      else if(iproc_xi == 0 .or. iproc_xi == NPROC_XI-1 .or. &
+              iproc_eta == 0 .or. iproc_eta == NPROC_ETA-1) then
+! slices on the "vertical" face plus one slice at the bottom
+        nb_msgs_theor_in_cube = ceiling(NPROC_XI/2.d0) + 1
+      else
+! bottom element only
+        nb_msgs_theor_in_cube = 1
+      endif
+    endif
+  else if(ichunk == CHUNK_AB_ANTIPODE) then
+    if(NPROC_XI == 1) then
+! five sides if only one processor in cube
+      nb_msgs_theor_in_cube = 5
+    else
+! case of a corner
+      if((iproc_xi == 0 .or. iproc_xi == NPROC_XI-1).and. &
+         (iproc_eta == 0 .or. iproc_eta == NPROC_ETA-1)) then
+! slices on both "vertical" faces plus one slice at the bottom
+        nb_msgs_theor_in_cube = 2*(floor(NPROC_XI/2.d0)) + 1
+! case of an edge
+      else if(iproc_xi == 0 .or. iproc_xi == NPROC_XI-1 .or. &
+              iproc_eta == 0 .or. iproc_eta == NPROC_ETA-1) then
+! slices on the "vertical" face plus one slice at the bottom
+        nb_msgs_theor_in_cube = floor(NPROC_XI/2.d0) + 1
+      else
+! bottom element only
+        nb_msgs_theor_in_cube = 1
+      endif
+    endif
+  else
+! not in chunk AB
+    nb_msgs_theor_in_cube = 0
+  endif
+
+! number of points to send or receive (bottom of slices)
+  npoin2D_cube_from_slices = NSPEC2D_BOTTOM_INNER_CORE * NGLLX * NGLLY
+
+  end subroutine comp_central_cube_buffer_size
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/fix_non_blocking_flags.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/fix_non_blocking_flags.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/fix_non_blocking_flags.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,178 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+! fix the non blocking arrays to assemble the slices inside each chunk: elements
+! in contact with the MPI faces by an edge or a corner only but not
+! a full face are missing, therefore let us add them
+  subroutine fix_non_blocking_slices(is_on_a_slice_edge,iboolright_xi, &
+         iboolleft_xi,iboolright_eta,iboolleft_eta, &
+         npoin2D_xi,npoin2D_eta,ibool, &
+         mask_ibool,nspec,nglob,NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX)
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: nspec,nglob,NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX
+
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi,npoin2D_eta
+
+  logical, dimension(nspec) :: is_on_a_slice_edge
+
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX) :: iboolleft_xi,iboolright_xi
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX) :: iboolleft_eta,iboolright_eta
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+  logical, dimension(nglob) :: mask_ibool
+
+  integer :: ipoin,ispec,i,j,k
+
+! clean the mask
+  mask_ibool(:) = .false.
+
+! mark all the points that are in the MPI buffers to assemble inside each chunk
+  do ipoin = 1,npoin2D_xi(1)
+    mask_ibool(iboolleft_xi(ipoin)) = .true.
+  enddo
+
+  do ipoin = 1,npoin2D_eta(1)
+    mask_ibool(iboolleft_eta(ipoin)) = .true.
+  enddo
+
+  do ipoin = 1,npoin2D_xi(2)
+    mask_ibool(iboolright_xi(ipoin)) = .true.
+  enddo
+
+  do ipoin = 1,npoin2D_eta(2)
+    mask_ibool(iboolright_eta(ipoin)) = .true.
+  enddo
+
+! now label all the elements that have at least one corner belonging
+! to any of these buffers as elements that must contribute to the
+! first step of the calculations (performed on the edges before starting
+! the non blocking communications); there is no need to examine the inside
+! of the elements, checking their eight corners is sufficient
+  do ispec = 1,nspec
+    do k = 1,NGLLZ,NGLLZ-1
+      do j  = 1,NGLLY,NGLLY-1
+        do i = 1,NGLLX,NGLLX-1
+          if(mask_ibool(ibool(i,j,k,ispec))) then
+            is_on_a_slice_edge(ispec) = .true.
+            goto 888
+          endif
+        enddo
+      enddo
+    enddo
+  888 continue
+  enddo
+
+  end subroutine fix_non_blocking_slices
+
+!-------------------------------------------------------------------------------------------------
+!-------------------------------------------------------------------------------------------------
+!-------------------------------------------------------------------------------------------------
+
+! fix the non blocking arrays to assemble the central cube: elements
+! in contact with the MPI faces by an edge or a corner only but not
+! a full face are missing, therefore let us add them
+  subroutine fix_non_blocking_central_cube(is_on_a_slice_edge, &
+         ibool,nspec,nglob,nb_msgs_theor_in_cube,ibelm_bottom_inner_core, &
+         idoubling_inner_core,npoin2D_cube_from_slices,ibool_central_cube,NSPEC2D_BOTTOM_INNER_CORE,ichunk)
+
+  implicit none
+
+  include "constants.h"
+
+  integer :: nspec,nglob,nb_msgs_theor_in_cube,NSPEC2D_BOTTOM_INNER_CORE,ichunk,npoin2D_cube_from_slices
+
+  logical, dimension(nspec) :: is_on_a_slice_edge
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+
+  integer, dimension(nb_msgs_theor_in_cube,npoin2D_cube_from_slices) :: ibool_central_cube
+
+  integer, dimension(NSPEC2D_BOTTOM_INNER_CORE) :: ibelm_bottom_inner_core
+
+! local to global mapping
+  integer, dimension(nspec) :: idoubling_inner_core
+
+! this mask is declared as integer in the calling program because it is used elsewhere
+! to store integers, and it is reused here as a logical to save memory
+  logical, dimension(nglob) :: mask_ibool
+
+  integer :: ipoin,ispec,i,j,k,imsg,ispec2D
+
+  if(ichunk /= CHUNK_AB .and. ichunk /= CHUNK_AB_ANTIPODE) then
+    do ispec2D = 1,NSPEC2D_BOTTOM_INNER_CORE
+      ispec = ibelm_bottom_inner_core(ispec2D)
+      is_on_a_slice_edge(ispec) = .true.
+    enddo
+  endif
+
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+    do ispec = 1,nspec
+      if(idoubling_inner_core(ispec) == IFLAG_BOTTOM_CENTRAL_CUBE .or. &
+         idoubling_inner_core(ispec) == IFLAG_TOP_CENTRAL_CUBE) &
+        is_on_a_slice_edge(ispec) = .true.
+    enddo
+  endif
+
+  if(ichunk == CHUNK_AB .or. ichunk == CHUNK_AB_ANTIPODE) then
+
+! clean the mask
+  mask_ibool(:) = .false.
+
+    do imsg = 1,nb_msgs_theor_in_cube
+      do ipoin = 1,npoin2D_cube_from_slices
+        mask_ibool(ibool_central_cube(imsg,ipoin)) = .true.
+      enddo
+    enddo
+
+! now label all the elements that have at least one corner belonging
+! to any of these buffers as elements that must contribute to the
+! first step of the calculations (performed on the edges before starting
+! the non blocking communications); there is no need to examine the inside
+! of the elements, checking their eight corners is sufficient
+  do ispec = 1,nspec
+    do k = 1,NGLLZ,NGLLZ-1
+      do j  = 1,NGLLY,NGLLY-1
+        do i = 1,NGLLX,NGLLX-1
+          if(mask_ibool(ibool(i,j,k,ispec))) then
+            is_on_a_slice_edge(ispec) = .true.
+            goto 888
+          endif
+        enddo
+      enddo
+    enddo
+  888 continue
+  enddo
+
+  endif
+
+  end subroutine fix_non_blocking_central_cube
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_regular_points.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_regular_points.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/locate_regular_points.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,491 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+subroutine read_kl_regular_grid(myrank, GRID)
+
+  implicit none
+  include 'constants.h'
+
+  type kl_reg_grid_variables
+    sequence
+    real dlat
+    real dlon
+    integer nlayer
+    real rlayer(NM_KL_REG_LAYER)
+    integer ndoubling(NM_KL_REG_LAYER)
+    integer nlat(NM_KL_REG_LAYER)
+    integer nlon(NM_KL_REG_LAYER)
+    integer npts_total
+    integer npts_before_layer(NM_KL_REG_LAYER+1)
+  end type kl_reg_grid_variables
+
+  type (kl_reg_grid_variables), intent(inout) :: GRID
+
+  integer :: myrank,ios,nlayer,i,nlat,nlon,npts_this_layer
+
+  ! improvements to make: read-in by master and broadcast to all slaves
+  open(10,file=PATHNAME_KL_REG,iostat=ios,status='old',action='read')
+
+  read(10,*) GRID%dlat, GRID%dlon
+
+  nlayer = 1
+  do while (nlayer <= NM_KL_REG_LAYER)
+    read(10,*,iostat=ios) GRID%rlayer(nlayer), GRID%ndoubling(nlayer)
+    if (ios/=0) exit
+    nlayer = nlayer + 1
+  enddo
+  close(10)
+
+  if (nlayer > NM_KL_REG_LAYER) then
+    call exit_MPI(myrank, 'Increase NM_KL_REG_LAYER limit')
+  endif
+
+  GRID%nlayer = nlayer
+
+  GRID%npts_total = 0
+  GRID%npts_before_layer = 0
+  do i = 1, nlayer
+    nlon = floor((KL_REG_MAX_LON-KL_REG_MIN_LON)/(GRID%dlon*GRID%ndoubling(i)))+1
+    GRID%nlon(i) = nlon
+    nlat = floor((KL_REG_MAX_LAT-KL_REG_MIN_LAT)/(GRID%dlat*GRID%ndoubling(i)))+1
+    GRID%nlat(i) = nlat
+    npts_this_layer = nlon * nlat
+    GRID%npts_total = GRID%npts_total + npts_this_layer
+    GRID%npts_before_layer(i+1) = GRID%npts_before_layer(i) + npts_this_layer
+  enddo
+  if (GRID%npts_total <= 0) then
+    call exit_MPI(myrank, 'No Model points read in')
+  endif
+
+end subroutine read_kl_regular_grid
+
+!==============================================================
+
+subroutine find_regular_grid_slice_number(slice_number, GRID, &
+                                          NCHUNKS, NPROC_XI, NPROC_ETA)
+
+  implicit none
+  include 'constants.h'
+
+  integer, intent(out) :: slice_number(*)
+
+  type kl_reg_grid_variables
+    sequence
+    real dlat
+    real dlon
+    integer nlayer
+    real rlayer(NM_KL_REG_LAYER)
+    integer ndoubling(NM_KL_REG_LAYER)
+    integer nlat(NM_KL_REG_LAYER)
+    integer nlon(NM_KL_REG_LAYER)
+    integer npts_total
+    integer npts_before_layer(NM_KL_REG_LAYER+1)
+  end type kl_reg_grid_variables
+  type (kl_reg_grid_variables), intent(in) :: GRID
+
+  integer, intent(in) :: NCHUNKS,NPROC_XI,NPROC_ETA
+
+  real(kind=CUSTOM_REAL) :: xi_width, eta_width
+  integer :: nproc, ilayer, isp, ilat, ilon, k, chunk_isp
+  integer :: iproc_xi, iproc_eta
+  real :: lat,lon,th,ph,x,y,z,xik,etak,xi_isp,eta_isp,xi1,eta1
+
+  ! assuming 6 chunks full global simulations right now
+  if (NCHUNKS /= 6 .or. NPROC_XI /= NPROC_ETA) then
+    call exit_MPI(0, 'Only deal with 6 chunks at this moment')
+  endif
+
+  xi_width=PI/2; eta_width=PI/2; nproc=NPROC_XI
+  ilayer=0
+
+  do isp = 1,GRID%npts_total
+    if (isp == GRID%npts_before_layer(ilayer+1)+1) ilayer=ilayer+1
+    ilat = (isp - GRID%npts_before_layer(ilayer) - 1) / GRID%nlat(ilayer)
+    ilon = (isp - GRID%npts_before_layer(ilayer) - 1) - ilat * GRID%nlat(ilayer)
+
+    ! (lat,lon,radius) for isp point
+    lat = KL_REG_MIN_LAT + ilat * GRID%dlat * GRID%ndoubling(ilayer)
+    th = (90 - lat) * DEGREES_TO_RADIANS
+    lon = KL_REG_MIN_LON + ilon * GRID%dlon * GRID%ndoubling(ilayer)
+    ph = lon * DEGREES_TO_RADIANS
+    x = sin(th) * cos(ph); y = sin(th) * sin(ph); z = cos(th)
+
+    ! figure out slice number
+    chunk_isp = 1; xi_isp = 0; eta_isp = 0
+    do k = 1, NCHUNKS
+      call chunk_map(k, x, y, z, xik, etak)
+      if (abs(xik) <= PI/4 .and. abs(etak) <= PI/4) then
+        chunk_isp = k;  xi_isp = xik; eta_isp = etak; exit
+      endif
+    enddo
+    xi1 = xi_isp / xi_width * 2; eta1 = eta_isp / eta_width * 2
+    iproc_xi = floor((xi1+1)/2 * nproc)
+    iproc_eta = floor((eta1+1)/2 * nproc)
+    slice_number(isp) = nproc * nproc * (chunk_isp-1) + nproc * iproc_eta + iproc_xi
+  enddo
+
+end subroutine find_regular_grid_slice_number
+
+!==============================================================
+
+! how about using single precision for the iterations?
+subroutine locate_reg_points(npoints_slice,points_slice,GRID, &
+                             NEX_XI,nspec,xstore,ystore,zstore,ibool, &
+                             xigll,yigll,zigll,ispec_reg, &
+                             hxir_reg,hetar_reg,hgammar_reg)
+
+  implicit none
+  include 'constants.h'
+
+  ! declarations of regular grid model
+  integer, intent(in) :: npoints_slice
+  integer, dimension(NM_KL_REG_PTS), intent(in) :: points_slice
+
+  type kl_reg_grid_variables
+    sequence
+    real dlat
+    real dlon
+    integer nlayer
+    real rlayer(NM_KL_REG_LAYER)
+    integer ndoubling(NM_KL_REG_LAYER)
+    integer nlat(NM_KL_REG_LAYER)
+    integer nlon(NM_KL_REG_LAYER)
+    integer npts_total
+    integer npts_before_layer(NM_KL_REG_LAYER+1)
+  end type kl_reg_grid_variables
+  type (kl_reg_grid_variables), intent(in) :: GRID
+
+  ! simulation geometry
+  integer, intent(in) :: NEX_XI, nspec
+  real(kind=CUSTOM_REAL), dimension(*), intent(in) :: xstore,ystore,zstore
+  integer, dimension(NGLLX,NGLLY,NGLLZ,*), intent(in) :: ibool
+
+  ! Gauss-Lobatto-Legendre points of integration and weights
+  double precision, dimension(NGLLX), intent(in) :: xigll
+  double precision, dimension(NGLLY), intent(in) :: yigll
+  double precision, dimension(NGLLZ), intent(in) :: zigll
+
+  ! output
+  integer, dimension(NM_KL_REG_PTS), intent(out) :: ispec_reg
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NM_KL_REG_PTS), intent(out) :: hxir_reg
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NM_KL_REG_PTS), intent(out) :: hetar_reg
+  real(kind=CUSTOM_REAL), dimension(NGLLZ,NM_KL_REG_PTS), intent(out) :: hgammar_reg
+
+  ! GLL number of anchors
+  integer, dimension(NGNOD) :: iaddx, iaddy, iaddr
+
+  integer :: i, j, k, isp, ilayer, ilat, ilon, iglob, ix_in, iy_in, iz_in
+  integer :: ispec_in, ispec, iter_loop, ia, ipoint
+  double precision :: lat, lon, radius, th, ph, x,y,z
+  double precision :: x_target, y_target, z_target
+  double precision :: distmin,dist,typical_size
+  double precision :: xi,eta,gamma,dx,dy,dz,dxi,deta,dgamma
+  double precision :: xix,xiy,xiz
+  double precision :: etax,etay,etaz
+  double precision :: gammax,gammay,gammaz
+
+  logical locate_target
+  double precision, dimension(NGNOD) :: xelm, yelm, zelm
+
+  double precision, dimension(NGLLX) :: hxir
+  double precision, dimension(NGLLY) :: hetar
+  double precision, dimension(NGLLZ) :: hgammar
+
+  ! DEBUG
+  !real(kind=CUSTOM_REAL), dimension(npoints_slice) :: dist_final
+
+  !---------------------------
+
+  call hex_nodes2(iaddx,iaddy,iaddr)
+
+  ! compute typical size of elements at the surface
+  typical_size = TWO_PI * R_UNIT_SPHERE / (4.*NEX_XI)
+
+  ! use 10 times the distance as a criterion for source detection
+  typical_size = 10. * typical_size
+
+  ! DEBUG
+  !dist_final=HUGEVAL
+
+  do ipoint = 1, npoints_slice
+    isp = points_slice(ipoint)
+    do ilayer = 1, GRID%nlayer
+      if (isp <= GRID%npts_before_layer(ilayer+1)) exit
+    enddo
+
+    ilat = (isp - GRID%npts_before_layer(ilayer) - 1) / GRID%nlat(ilayer)
+    ilon = (isp - GRID%npts_before_layer(ilayer) - 1) - ilat * GRID%nlat(ilayer)
+
+    ! (lat,lon,radius) for isp point
+    lat = KL_REG_MIN_LAT + ilat * GRID%dlat * GRID%ndoubling(ilayer)
+    lon = KL_REG_MIN_LON + ilon * GRID%dlon * GRID%ndoubling(ilayer)
+    ! convert radius to meters and then scale
+    radius = GRID%rlayer(ilayer) * 1000.0 / R_EARTH
+    ! (x,y,z) for isp point
+    th = (90 - lat) * DEGREES_TO_RADIANS; ph = lon * DEGREES_TO_RADIANS
+    x_target = radius * sin(th) * cos(ph)
+    y_target = radius * sin(th) * sin(ph)
+    z_target = radius * cos(th)
+
+    ! first exclude elements too far away
+    locate_target = .false.;  distmin = HUGEVAL
+    do ispec = 1,nspec
+      iglob = ibool(1,1,1,ispec)
+      dist = dsqrt((x_target - xstore(iglob))**2 &
+                 + (y_target - ystore(iglob))**2 &
+                 + (z_target - zstore(iglob))**2)
+      if (dist > typical_size) cycle
+
+      locate_target = .true.
+      ! loop only on points inside the element
+      ! exclude edges to ensure this point is not
+      ! shared with other elements
+      ! can be improved if we have a better algorithm of determining if a point
+      ! exists inside a 3x3x3 specfem element ???
+
+      do k = 2, NGLLZ-1
+        do j = 2, NGLLY-1
+          do i = 2, NGLLX-1
+            iglob = ibool(i,j,k,ispec)
+            dist = dsqrt((x_target - xstore(iglob))**2 &
+                        +(y_target - ystore(iglob))**2 &
+                        +(z_target - zstore(iglob))**2)
+            if (dist < distmin) then
+              ix_in=i; iy_in=j; iz_in=k; ispec_in=ispec; distmin=dist
+            endif
+          enddo
+        enddo
+      enddo
+
+    enddo
+    if (.not. locate_target) stop 'error in point_source() array'
+
+    xi = xigll(ix_in)
+    eta = yigll(iy_in)
+    gamma = zigll(iz_in)
+    ispec_reg(ipoint) = ispec_in
+
+    ! anchors
+    do ia = 1, NGNOD
+      iglob = ibool(iaddx(ia), iaddy(ia), iaddr(ia), ispec_in)
+      xelm(ia) = dble(xstore(iglob))
+      yelm(ia) = dble(ystore(iglob))
+      zelm(ia) = dble(zstore(iglob))
+    enddo
+
+    ! iterate to solve the nonlinear system
+    do iter_loop = 1,NUM_ITER
+
+      ! recompute jacobian for the new point
+      call recompute_jacobian(xelm,yelm,zelm, xi,eta,gamma, x,y,z, &
+                              xix,xiy,xiz, etax,etay,etaz, gammax,gammay,gammaz)
+
+      ! compute distance to target location
+      dx = - (x - x_target)
+      dy = - (y - y_target)
+      dz = - (z - z_target)
+
+      ! compute increments
+      dxi  = xix*dx + xiy*dy + xiz*dz
+      deta = etax*dx + etay*dy + etaz*dz
+      dgamma = gammax*dx + gammay*dy + gammaz*dz
+
+      ! update values
+      xi = xi + dxi
+      eta = eta + deta
+      gamma = gamma + dgamma
+
+      ! Debugging
+      !if (abs(xi) > 1.d0+TINYVAL .or. abs(eta) > 1.d0+TINYVAL &
+      !     .or. abs(gamma) > 1.0d0+TINYVAL) then
+      !   print *, 'Outside the element ', myrank, ipoint,' : ', &
+      !        iter_loop,xi,eta,gamma
+      !endif
+
+      ! impose that we stay in that element
+      ! (useful if user gives a source outside the mesh for instance)
+      if (xi > 1.d0) xi = 1.d0
+      if (xi < -1.d0) xi = -1.d0
+      if (eta > 1.d0) eta = 1.d0
+      if (eta < -1.d0) eta = -1.d0
+      if (gamma > 1.d0) gamma = 1.d0
+      if (gamma < -1.d0) gamma = -1.d0
+
+    enddo
+
+    ! DEBUG: recompute jacobian for the new point (can be commented after debug)
+    !call recompute_jacobian(xelm,yelm,zelm,xi,eta,gamma,x,y,z,xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz)
+    !dist_final(ipoint)=dsqrt((x_target-x)**2+(y_target-y)**2+(z_target-z)**2)
+
+    ! store l(xi),l(eta),l(gamma)
+    call lagrange_any2(xi, NGLLX, xigll, hxir)
+    call lagrange_any2(eta, NGLLY, yigll, hetar)
+    call lagrange_any2(gamma, NGLLZ, zigll, hgammar)
+    hxir_reg(:,ipoint) = hxir
+    hetar_reg(:,ipoint) = hetar
+    hgammar_reg(:,ipoint) = hgammar
+
+  enddo ! ipoint
+
+! DEBUG
+!  print *, 'Maximum distance discrepancy ', maxval(dist_final(1:npoints_slice))
+
+end subroutine locate_reg_points
+
+!==============================================================
+
+subroutine hex_nodes2(iaddx,iaddy,iaddz)
+
+  implicit none
+  include 'constants.h'
+
+  integer, dimension(NGNOD), intent(out) :: iaddx,iaddy,iaddz
+  integer :: ia
+
+  ! define topology of the control element
+  call hex_nodes(iaddx,iaddy,iaddz)
+
+  ! define coordinates of the control points of the element
+  do ia=1,NGNOD
+
+     if (iaddx(ia) == 0) then
+        iaddx(ia) = 1
+     else if (iaddx(ia) == 1) then
+        iaddx(ia) = (NGLLX+1)/2
+     else if (iaddx(ia) == 2) then
+        iaddx(ia) = NGLLX
+     else
+        stop 'incorrect value of iaddx'
+     endif
+
+     if (iaddy(ia) == 0) then
+        iaddy(ia) = 1
+     else if (iaddy(ia) == 1) then
+        iaddy(ia) = (NGLLY+1)/2
+     else if (iaddy(ia) == 2) then
+        iaddy(ia) = NGLLY
+     else
+        stop 'incorrect value of iaddy'
+     endif
+
+     if (iaddz(ia) == 0) then
+        iaddz(ia) = 1
+     else if (iaddz(ia) == 1) then
+        iaddz(ia) = (NGLLZ+1)/2
+     else if (iaddz(ia) == 2) then
+        iaddz(ia) = NGLLZ
+     else
+        stop 'incorrect value of iaddz'
+     endif
+
+  enddo
+
+end subroutine hex_nodes2
+
+!==============================================================
+
+subroutine lagrange_any2(xi,NGLL,xigll,h)
+
+! subroutine to compute the Lagrange interpolants based upon the GLL points
+! and their first derivatives at any point xi in [-1,1]
+
+  implicit none
+
+  double precision, intent(in) :: xi
+  integer, intent(in) :: NGLL
+  double precision, dimension(NGLL), intent(in) :: xigll
+  double precision, dimension(NGLL), intent(out) :: h
+
+  integer :: dgr,i
+  double precision :: prod1,prod2
+
+  do dgr=1,NGLL
+     prod1 = 1.0d0
+     prod2 = 1.0d0
+
+     do i=1,NGLL
+        if (i /= dgr) then
+           prod1 = prod1 * (xi         - xigll(i))
+           prod2 = prod2 * (xigll(dgr) - xigll(i))
+        endif
+     enddo
+
+     h(dgr) = prod1 / prod2
+  enddo
+
+end subroutine lagrange_any2
+
+!==============================================================
+
+subroutine chunk_map(k,xx,yy,zz,xi,eta)
+
+  ! this program get the xi,eta for (xx,yy,zz)
+  ! point under the k'th chunk coordinate
+  ! transformation
+
+  implicit none
+  include 'constants.h'
+
+  integer, intent(in) :: k
+  real, intent(in) :: xx, yy, zz
+  real, intent(out) :: xi, eta
+
+  real :: x, y, z
+  real, parameter :: EPS=1e-6
+
+  x = xx; y = yy; z = zz
+  if (0 <= x .and. x < EPS)  x = EPS
+  if (-EPS < x .and. x < 0)  x = -EPS
+  if (0 <= y .and. y < EPS)  y = EPS
+  if (-EPS < y .and. y < 0)  y = -EPS
+  if (0 <= z .and. z < EPS)  z = EPS
+  if (-EPS < z .and. z < 0)  z = -EPS
+
+  if (k == CHUNK_AB) then
+     xi = atan(y/z); eta = atan(-x/z)
+     if (z < 0)  xi = 10
+  else if (k == CHUNK_AC) then
+     xi = atan(-z/y); eta = atan(x/y)
+     if (y > 0)  xi = 10
+  else if (k == CHUNK_BC) then
+     xi = atan(-z/x); eta = atan(-y/x)
+     if (x > 0)  xi = 10
+  else if (k == CHUNK_AC_ANTIPODE) then
+     xi = atan(-z/y); eta = atan(-x/y)
+     if (y < 0)  xi = 10
+  else if (k == CHUNK_BC_ANTIPODE) then
+     xi = atan(z/x); eta = atan(-y/x)
+     if (x < 0)  xi = 10
+  else if (k == CHUNK_AB_ANTIPODE) then
+     xi = atan(y/z); eta = atan(x/z)
+     if (z > 0)  xi = 10
+  else
+     stop 'chunk number k < 6'
+  endif
+
+end subroutine chunk_map
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_buffers_solver.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_buffers_solver.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/read_arrays_buffers_solver.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,428 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine read_arrays_buffers_solver(iregion_code,myrank, &
+     iboolleft_xi,iboolright_xi,iboolleft_eta,iboolright_eta, &
+     npoin2D_xi,npoin2D_eta, &
+     iprocfrom_faces,iprocto_faces,imsg_type, &
+     iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+     iboolfaces,npoin2D_faces, &
+     iboolcorner, &
+     NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY,NGLOB1D_RADIAL, &
+     NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT,NPROC_XI,NPROC_ETA, &
+     LOCAL_PATH,NCHUNKS)
+
+  implicit none
+
+! standard include of the MPI library
+  include 'mpif.h'
+
+  include "constants.h"
+
+  integer iregion_code,myrank,NCHUNKS
+
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi,npoin2D_eta
+
+  integer NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,NGLOB2DMAX_XY,NGLOB1D_RADIAL
+  integer NUMMSGS_FACES,NCORNERSCHUNKS,NPROCTOT,NPROC_XI,NPROC_ETA
+
+  integer npoin2D_faces(NUMFACES_SHARED)
+
+  character(len=150) LOCAL_PATH
+
+  integer, dimension(NGLOB2DMAX_XY,NUMFACES_SHARED) :: iboolfaces
+  integer, dimension(NGLOB1D_RADIAL,NUMCORNERS_SHARED) :: iboolcorner
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX) :: iboolleft_xi,iboolright_xi
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX) :: iboolleft_eta,iboolright_eta
+
+  integer, dimension(NUMMSGS_FACES) :: iprocfrom_faces,iprocto_faces,imsg_type
+
+! allocate array for messages for corners
+  integer, dimension(NCORNERSCHUNKS) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+  ! local parameters
+  integer ier
+  integer imsg
+
+
+! processor identification
+  character(len=150) OUTPUT_FILES
+
+! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
+
+
+! read 2-D addressing for summation between slices along xi with MPI
+  call read_arrays_buffers_1(iregion_code,myrank, &
+     iboolleft_xi,iboolright_xi,iboolleft_eta,iboolright_eta, &
+     npoin2D_xi,npoin2D_eta, &
+     NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,LOCAL_PATH)
+
+
+! read chunk messages only if more than one chunk
+  if(NCHUNKS /= 1) then
+
+    ! read messages to assemble between chunks with MPI
+    if(myrank == 0) then
+
+      ! get the base pathname for output files
+      call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', 'OUTPUT_FILES')
+
+      ! file with the list of processors for each message for faces
+      open(unit=IIN,file=trim(OUTPUT_FILES)//'/list_messages_faces.txt',status='old',action='read')
+      do imsg = 1,NUMMSGS_FACES
+      read(IIN,*) imsg_type(imsg),iprocfrom_faces(imsg),iprocto_faces(imsg)
+      if      (iprocfrom_faces(imsg) < 0 &
+          .or. iprocto_faces(imsg) < 0 &
+          .or. iprocfrom_faces(imsg) > NPROCTOT-1 &
+          .or. iprocto_faces(imsg) > NPROCTOT-1) &
+        call exit_MPI(myrank,'incorrect chunk faces numbering')
+      if (imsg_type(imsg) < 1 .or. imsg_type(imsg) > 3) &
+        call exit_MPI(myrank,'incorrect message type labeling')
+      enddo
+      close(IIN)
+
+
+      ! file with the list of processors for each message for corners
+      open(unit=IIN,file=trim(OUTPUT_FILES)//'/list_messages_corners.txt',status='old',action='read')
+      do imsg = 1,NCORNERSCHUNKS
+      read(IIN,*) iproc_master_corners(imsg),iproc_worker1_corners(imsg), &
+                              iproc_worker2_corners(imsg)
+      if    (iproc_master_corners(imsg) < 0 &
+        .or. iproc_worker1_corners(imsg) < 0 &
+        .or. iproc_worker2_corners(imsg) < 0 &
+        .or. iproc_master_corners(imsg) > NPROCTOT-1 &
+        .or. iproc_worker1_corners(imsg) > NPROCTOT-1 &
+        .or. iproc_worker2_corners(imsg) > NPROCTOT-1) &
+          call exit_MPI(myrank,'incorrect chunk corner numbering')
+      enddo
+      close(IIN)
+
+    endif
+
+    ! broadcast the information read on the master to the nodes
+    call MPI_BCAST(imsg_type,NUMMSGS_FACES,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+    call MPI_BCAST(iprocfrom_faces,NUMMSGS_FACES,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+    call MPI_BCAST(iprocto_faces,NUMMSGS_FACES,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+
+    call MPI_BCAST(iproc_master_corners,NCORNERSCHUNKS,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+    call MPI_BCAST(iproc_worker1_corners,NCORNERSCHUNKS,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+    call MPI_BCAST(iproc_worker2_corners,NCORNERSCHUNKS,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+
+    ! synchronizes processes
+    call sync_all()
+
+    call read_arrays_buffers_2(iregion_code,myrank, &
+     iprocfrom_faces,iprocto_faces, &
+     iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+     iboolfaces,npoin2D_faces,iboolcorner, &
+     NGLOB2DMAX_XY,NGLOB1D_RADIAL, &
+     NUMMSGS_FACES,NCORNERSCHUNKS,NPROC_XI,NPROC_ETA,LOCAL_PATH)
+
+
+  endif
+
+
+  end subroutine read_arrays_buffers_solver
+
+!
+!------------------------------------------------------------------------------------------------------
+!
+
+
+  subroutine read_arrays_buffers_1(iregion_code,myrank, &
+     iboolleft_xi,iboolright_xi,iboolleft_eta,iboolright_eta, &
+     npoin2D_xi,npoin2D_eta, &
+     NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX,LOCAL_PATH)
+
+  implicit none
+
+! standard include of the MPI library
+  include 'mpif.h'
+
+  include "constants.h"
+
+  integer iregion_code,myrank
+
+  integer, dimension(NB_SQUARE_EDGES_ONEDIR) :: npoin2D_xi,npoin2D_eta
+  integer NGLOB2DMAX_XMIN_XMAX,NGLOB2DMAX_YMIN_YMAX
+
+  character(len=150) LOCAL_PATH
+
+  integer, dimension(NGLOB2DMAX_XMIN_XMAX) :: iboolleft_xi,iboolright_xi
+  integer, dimension(NGLOB2DMAX_YMIN_YMAX) :: iboolleft_eta,iboolright_eta
+
+  ! local parameters
+  integer ier
+  integer npoin2D_xi_mesher,npoin2D_eta_mesher
+
+  double precision xdummy,ydummy,zdummy
+
+! processor identification
+  character(len=150) OUTPUT_FILES,prname
+
+! get the base pathname for output files
+  call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', 'OUTPUT_FILES')
+
+! create the name for the database of the current slide and region
+  call create_name_database(prname,myrank,iregion_code,LOCAL_PATH)
+
+! read iboolleft_xi of this slice
+  open(unit=IIN,file=prname(1:len_trim(prname))//'iboolleft_xi.txt', &
+        status='old',action='read',iostat=ier)
+  if( ier /= 0 ) call exit_mpi(myrank,'error opening iboolleft_xi file')
+
+  npoin2D_xi(1) = 1
+ 350  continue
+  read(IIN,*) iboolleft_xi(npoin2D_xi(1)),xdummy,ydummy,zdummy
+  if(iboolleft_xi(npoin2D_xi(1)) > 0) then
+      npoin2D_xi(1) = npoin2D_xi(1) + 1
+      goto 350
+  endif
+! subtract the line that contains the flag after the last point
+  npoin2D_xi(1) = npoin2D_xi(1) - 1
+! read nb of points given by the mesher
+  read(IIN,*) npoin2D_xi_mesher
+  if(npoin2D_xi(1) > NGLOB2DMAX_XMIN_XMAX .or. npoin2D_xi(1) /= npoin2D_xi_mesher) &
+      call exit_MPI(myrank,'incorrect iboolleft_xi read')
+  close(IIN)
+
+  ! synchronizes processes
+  call sync_all()
+
+! read iboolright_xi of this slice
+  open(unit=IIN,file=prname(1:len_trim(prname))//'iboolright_xi.txt',status='old',action='read')
+  npoin2D_xi(2) = 1
+ 360  continue
+  read(IIN,*) iboolright_xi(npoin2D_xi(2)),xdummy,ydummy,zdummy
+  if(iboolright_xi(npoin2D_xi(2)) > 0) then
+      npoin2D_xi(2) = npoin2D_xi(2) + 1
+      goto 360
+  endif
+! subtract the line that contains the flag after the last point
+  npoin2D_xi(2) = npoin2D_xi(2) - 1
+! read nb of points given by the mesher
+  read(IIN,*) npoin2D_xi_mesher
+  if(npoin2D_xi(2) > NGLOB2DMAX_XMIN_XMAX .or. npoin2D_xi(2) /= npoin2D_xi_mesher) &
+      call exit_MPI(myrank,'incorrect iboolright_xi read')
+  close(IIN)
+
+  if(myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) '# max of points in MPI buffers along xi npoin2D_xi = ', &
+                                maxval(npoin2D_xi(:))
+    write(IMAIN,*) '# max of array elements transferred npoin2D_xi*NDIM = ', &
+                                maxval(npoin2D_xi(:))*NDIM
+    write(IMAIN,*)
+  endif
+
+  ! synchronizes processes
+  call sync_all()
+
+
+! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
+
+! read 2-D addressing for summation between slices along eta with MPI
+
+! read iboolleft_eta of this slice
+  open(unit=IIN,file=prname(1:len_trim(prname))//'iboolleft_eta.txt',status='old',action='read')
+  npoin2D_eta(1) = 1
+ 370  continue
+  read(IIN,*) iboolleft_eta(npoin2D_eta(1)),xdummy,ydummy,zdummy
+  if(iboolleft_eta(npoin2D_eta(1)) > 0) then
+      npoin2D_eta(1) = npoin2D_eta(1) + 1
+      goto 370
+  endif
+! subtract the line that contains the flag after the last point
+  npoin2D_eta(1) = npoin2D_eta(1) - 1
+! read nb of points given by the mesher
+  read(IIN,*) npoin2D_eta_mesher
+  if(npoin2D_eta(1) > NGLOB2DMAX_YMIN_YMAX .or. npoin2D_eta(1) /= npoin2D_eta_mesher) &
+      call exit_MPI(myrank,'incorrect iboolleft_eta read')
+  close(IIN)
+
+  ! synchronizes processes
+  call sync_all()
+
+! read iboolright_eta of this slice
+  open(unit=IIN,file=prname(1:len_trim(prname))//'iboolright_eta.txt',status='old',action='read')
+  npoin2D_eta(2) = 1
+ 380  continue
+  read(IIN,*) iboolright_eta(npoin2D_eta(2)),xdummy,ydummy,zdummy
+  if(iboolright_eta(npoin2D_eta(2)) > 0) then
+      npoin2D_eta(2) = npoin2D_eta(2) + 1
+      goto 380
+  endif
+! subtract the line that contains the flag after the last point
+  npoin2D_eta(2) = npoin2D_eta(2) - 1
+! read nb of points given by the mesher
+  read(IIN,*) npoin2D_eta_mesher
+  if(npoin2D_eta(2) > NGLOB2DMAX_YMIN_YMAX .or. npoin2D_eta(2) /= npoin2D_eta_mesher) &
+      call exit_MPI(myrank,'incorrect iboolright_eta read')
+  close(IIN)
+
+  if(myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) '#max of points in MPI buffers along eta npoin2D_eta = ', &
+                                maxval(npoin2D_eta(:))
+    write(IMAIN,*) '#max of array elements transferred npoin2D_eta*NDIM = ', &
+                                maxval(npoin2D_eta(:))*NDIM
+    write(IMAIN,*)
+  endif
+
+  ! synchronizes processes
+  call sync_all()
+
+!! $$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$
+
+  end subroutine read_arrays_buffers_1
+
+
+!
+!-----------------------------------------------------------------------------------------------
+!
+
+
+  subroutine read_arrays_buffers_2(iregion_code,myrank, &
+     iprocfrom_faces,iprocto_faces, &
+     iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners, &
+     iboolfaces,npoin2D_faces,iboolcorner, &
+     NGLOB2DMAX_XY,NGLOB1D_RADIAL, &
+     NUMMSGS_FACES,NCORNERSCHUNKS,NPROC_XI,NPROC_ETA,LOCAL_PATH)
+
+  implicit none
+
+! standard include of the MPI library
+  include 'mpif.h'
+
+  include "constants.h"
+
+  integer iregion_code,myrank
+
+  integer NGLOB2DMAX_XY,NGLOB1D_RADIAL
+  integer NUMMSGS_FACES,NCORNERSCHUNKS,NPROC_XI,NPROC_ETA
+
+  integer npoin2D_faces(NUMFACES_SHARED)
+
+  character(len=150) LOCAL_PATH
+
+  integer, dimension(NGLOB2DMAX_XY,NUMFACES_SHARED) :: iboolfaces
+  integer, dimension(NGLOB1D_RADIAL,NUMCORNERS_SHARED) :: iboolcorner
+
+  integer, dimension(NUMMSGS_FACES) :: iprocfrom_faces,iprocto_faces
+
+! allocate array for messages for corners
+  integer, dimension(NCORNERSCHUNKS) :: iproc_master_corners,iproc_worker1_corners,iproc_worker2_corners
+
+  ! local parameters
+  double precision xdummy,ydummy,zdummy
+  integer imsg
+  integer npoin1D_corner
+
+  integer icount_faces,icount_corners
+  integer ipoin1D,ipoin2D
+
+! processor identification
+  character(len=150) OUTPUT_FILES,prname,filename
+
+! get the base pathname for output files
+  call get_value_string(OUTPUT_FILES, 'OUTPUT_FILES', 'OUTPUT_FILES')
+
+! create the name for the database of the current slide and region
+  call create_name_database(prname,myrank,iregion_code,LOCAL_PATH)
+
+!---- read indirect addressing for each message for faces of the chunks
+!---- a given slice can belong to at most two faces
+  icount_faces = 0
+  do imsg = 1,NUMMSGS_FACES
+    if(myrank == iprocfrom_faces(imsg) .or. myrank == iprocto_faces(imsg)) then
+      icount_faces = icount_faces + 1
+      if(icount_faces>NUMFACES_SHARED) &
+        call exit_MPI(myrank,'more than NUMFACES_SHARED faces for this slice')
+      if(icount_faces>2 .and. (NPROC_XI > 1 .or. NPROC_ETA > 1)) &
+        call exit_MPI(myrank,'more than two faces for this slice')
+
+      ! read file with 2D buffer for faces
+      if(myrank == iprocfrom_faces(imsg)) then
+        write(filename,"('buffer_faces_chunks_sender_msg',i6.6,'.txt')") imsg
+      else if(myrank == iprocto_faces(imsg)) then
+        write(filename,"('buffer_faces_chunks_receiver_msg',i6.6,'.txt')") imsg
+      endif
+
+      open(unit=IIN,file=prname(1:len_trim(prname))//filename,status='old',action='read')
+      read(IIN,*) npoin2D_faces(icount_faces)
+      if(npoin2D_faces(icount_faces) > NGLOB2DMAX_XY) &
+        call exit_MPI(myrank,'incorrect nb of points in face buffer')
+      do ipoin2D = 1,npoin2D_faces(icount_faces)
+        read(IIN,*) iboolfaces(ipoin2D,icount_faces),xdummy,ydummy,zdummy
+
+        ! checks read value
+        if( iboolfaces(ipoin2D,icount_faces) < 1) then
+          print*,'error rank',myrank,': iboolfaces index =',iboolfaces(ipoin2D,icount_faces),xdummy,ydummy,zdummy
+          print*,'  message:',imsg,NUMMSGS_FACES,ipoin2D,icount_faces,'region',iregion_code
+          call exit_MPI(myrank,'error reading in iboolfaces index')
+        endif
+
+      enddo
+      close(IIN)
+    endif
+  enddo
+
+!---- read indirect addressing for each message for corners of the chunks
+!---- a given slice can belong to at most one corner
+  icount_corners = 0
+  do imsg = 1,NCORNERSCHUNKS
+  if(myrank == iproc_master_corners(imsg) .or. &
+       myrank == iproc_worker1_corners(imsg) .or. &
+       myrank == iproc_worker2_corners(imsg)) then
+    icount_corners = icount_corners + 1
+    if(icount_corners>1 .and. (NPROC_XI > 1 .or. NPROC_ETA > 1)) &
+      call exit_MPI(myrank,'more than one corner for this slice')
+    if(icount_corners>4) call exit_MPI(myrank,'more than four corners for this slice')
+
+! read file with 1D buffer for corner
+    if(myrank == iproc_master_corners(imsg)) then
+      write(filename,"('buffer_corners_chunks_master_msg',i6.6,'.txt')") imsg
+    else if(myrank == iproc_worker1_corners(imsg)) then
+      write(filename,"('buffer_corners_chunks_worker1_msg',i6.6,'.txt')") imsg
+    else if(myrank == iproc_worker2_corners(imsg)) then
+      write(filename,"('buffer_corners_chunks_worker2_msg',i6.6,'.txt')") imsg
+    endif
+
+! matching codes
+    open(unit=IIN,file=prname(1:len_trim(prname))//filename,status='old',action='read')
+    read(IIN,*) npoin1D_corner
+    if(npoin1D_corner /= NGLOB1D_RADIAL) &
+      call exit_MPI(myrank,'incorrect nb of points in corner buffer')
+    do ipoin1D = 1,npoin1D_corner
+      read(IIN,*) iboolcorner(ipoin1D,icount_corners),xdummy,ydummy,zdummy
+    enddo
+    close(IIN)
+  endif
+  enddo
+
+  end subroutine read_arrays_buffers_2
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/rules.mk
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/rules.mk	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/rules.mk	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,335 @@
+#=====================================================================
+#
+#          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+#          --------------------------------------------------
+#
+#          Main authors: Dimitri Komatitsch and Jeroen Tromp
+#                        Princeton University, USA
+#             and University of Pau / CNRS / INRIA, France
+# (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+#                            April 2011
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#
+#=====================================================================
+
+#######################################
+
+specfem3D_TARGETS = \
+	$E/xspecfem3D \
+	$(EMPTY_MACRO)
+
+specfem3D_OBJECTS = \
+	$O/assemble_MPI_central_cube.o \
+	$O/assemble_MPI_scalar.o \
+	$O/assemble_MPI_vector.o \
+	$O/assemble_MPI_central_cube_block.o \
+	$O/assemble_MPI_scalar_block.o \
+	$O/assemble_MPI_vector_block.o \
+	$O/comp_source_spectrum.o \
+	$O/comp_source_time_function.o \
+	$O/compute_adj_source_frechet.o \
+	$O/compute_arrays_source.o \
+	$O/convert_time.o \
+	$O/create_central_cube_buffers.o \
+	$O/define_derivation_matrices.o \
+	$O/get_attenuation.o \
+	$O/get_backazimuth.o \
+	$O/get_cmt.o \
+	$O/get_event_info.o \
+	$O/locate_receivers.o \
+	$O/locate_regular_points.o \
+	$O/locate_sources.o \
+	$O/multiply_arrays_source.o \
+	$O/netlib_specfun_erf.o \
+	$O/read_arrays_buffers_solver.o \
+	$O/recompute_jacobian.o \
+	$O/save_regular_kernels.o \
+	$O/write_seismograms.o \
+	$O/write_output_ASCII.o \
+	$O/write_output_SAC.o \
+	$(EMPTY_MACRO)
+
+# solver objects with statically allocated arrays; dependent upon
+# values_from_mesher.h
+specfem3D_OBJECTS += \
+	$O/check_simulation_stability.o \
+	$O/compute_add_sources.o \
+	$O/compute_boundary_kernel.o \
+	$O/compute_coupling.o \
+	$O/compute_element.o \
+	$O/compute_forces_crust_mantle_noDev.o \
+	$O/compute_forces_crust_mantle_Dev.o \
+	$O/compute_forces_inner_core_noDev.o \
+	$O/compute_forces_inner_core_Dev.o \
+	$O/compute_forces_outer_core_noDev.o \
+	$O/compute_forces_outer_core_Dev.o \
+	$O/compute_kernels.o \
+	$O/compute_seismograms.o \
+	$O/compute_stacey_crust_mantle.o \
+	$O/compute_stacey_outer_core.o \
+	$O/fix_non_blocking_flags.o \
+	$O/initialize_simulation.o \
+	$O/noise_tomography.o \
+	$O/prepare_timerun.o \
+	$O/read_arrays_solver.o \
+	$O/read_forward_arrays.o \
+	$O/read_mesh_databases.o \
+	$O/save_forward_arrays.o \
+	$O/save_kernels.o \
+	$O/setup_sources_receivers.o \
+	$O/specfem3D.o \
+	$O/write_movie_volume.o \
+	$O/write_movie_surface.o \
+	$(EMPTY_MACRO)
+
+# These files come from the shared directory
+specfem3D_SHARED_OBJECTS = \
+	$O/auto_ner.o \
+	$O/broadcast_compute_parameters.o \
+	$O/calendar.o \
+	$O/count_number_of_sources.o \
+	$O/create_name_database.o \
+	$O/euler_angles.o \
+	$O/exit_mpi.o \
+	$O/force_ftz.o \
+	$O/get_model_parameters.o \
+	$O/get_value_parameters.o \
+	$O/gll_library.o \
+	$O/hex_nodes.o \
+	$O/intgrl.o \
+	$O/lagrange_poly.o \
+	$O/make_ellipticity.o \
+	$O/make_gravity.o \
+	$O/model_prem.o \
+	$O/model_topo_bathy.o \
+	$O/param_reader.o \
+	$O/read_compute_parameters.o \
+	$O/read_parameter_file.o \
+	$O/read_value_parameters.o \
+	$O/reduce.o \
+	$O/rthetaphi_xyz.o \
+	$O/spline_routines.o \
+	$O/write_c_binary.o \
+	$(EMPTY_MACRO)
+
+
+#######################################
+
+####
+#### rules for executables
+####
+
+${E}/xspecfem3D: $(specfem3D_OBJECTS) $(specfem3D_SHARED_OBJECTS)
+## use MPI here
+## DK DK add OpenMP compiler flag here if needed
+#	${MPIFCCOMPILE_CHECK} -qsmp=omp -o ${E}/xspecfem3D $(specfem3D_OBJECTS) $(specfem3D_SHARED_OBJECTS) $(MPILIBS)
+	${MPIFCCOMPILE_CHECK} -o ${E}/xspecfem3D $(specfem3D_OBJECTS) $(specfem3D_SHARED_OBJECTS) $(MPILIBS)
+
+#######################################
+
+## compilation directories
+S := ${S_TOP}/src/specfem3D
+$(specfem3D_OBJECTS): S = ${S_TOP}/src/specfem3D
+
+####
+#### rule for each .o file below
+####
+
+###
+### specfem3D - optimized flags and dependence on values from mesher here
+###
+$O/compute_add_sources.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_add_sources.f90
+	${FCCOMPILE_CHECK} -c -o $O/compute_add_sources.o ${FCFLAGS_f90} $S/compute_add_sources.f90
+
+$O/compute_boundary_kernel.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_boundary_kernel.f90
+	${FCCOMPILE_CHECK} -c -o $O/compute_boundary_kernel.o ${FCFLAGS_f90} $S/compute_boundary_kernel.f90
+
+$O/compute_coupling.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_coupling.f90
+	${FCCOMPILE_CHECK} -c -o $O/compute_coupling.o ${FCFLAGS_f90} $S/compute_coupling.f90
+
+$O/compute_element.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_element.F90
+	${FCCOMPILE_CHECK} -c -o $O/compute_element.o ${FCFLAGS_f90} $S/compute_element.F90
+
+$O/compute_forces_crust_mantle_noDev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_crust_mantle_noDev.f90
+	${FCCOMPILE_CHECK} -c -o $O/compute_forces_crust_mantle_noDev.o ${FCFLAGS_f90} $S/compute_forces_crust_mantle_noDev.f90
+
+$O/compute_forces_crust_mantle_Dev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_crust_mantle_Dev.F90
+## DK DK add OpenMP compiler flag here if needed
+#	${FCCOMPILE_CHECK} -c -qsmp=omp -o $O/compute_forces_crust_mantle_Dev.o ${FCFLAGS_f90} $S/compute_forces_crust_mantle_Dev.F90
+	${FCCOMPILE_CHECK} -c -o $O/compute_forces_crust_mantle_Dev.o ${FCFLAGS_f90} $S/compute_forces_crust_mantle_Dev.F90
+
+$O/compute_forces_outer_core_noDev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_outer_core_noDev.f90
+	${FCCOMPILE_CHECK} -c -o $O/compute_forces_outer_core_noDev.o ${FCFLAGS_f90} $S/compute_forces_outer_core_noDev.f90
+
+$O/compute_forces_outer_core_Dev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_outer_core_Dev.f90
+	${FCCOMPILE_CHECK} -c -o $O/compute_forces_outer_core_Dev.o ${FCFLAGS_f90} $S/compute_forces_outer_core_Dev.f90
+
+$O/compute_forces_inner_core_noDev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_inner_core_noDev.f90
+	${FCCOMPILE_CHECK} -c -o $O/compute_forces_inner_core_noDev.o ${FCFLAGS_f90} $S/compute_forces_inner_core_noDev.f90
+
+$O/compute_forces_inner_core_Dev.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_forces_inner_core_Dev.F90
+	${FCCOMPILE_CHECK} -c -o $O/compute_forces_inner_core_Dev.o ${FCFLAGS_f90} $S/compute_forces_inner_core_Dev.F90
+
+$O/compute_kernels.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_kernels.f90
+	${FCCOMPILE_CHECK} -c -o $O/compute_kernels.o ${FCFLAGS_f90} $S/compute_kernels.f90
+
+$O/compute_seismograms.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_seismograms.f90
+	${FCCOMPILE_CHECK} -c -o $O/compute_seismograms.o ${FCFLAGS_f90} $S/compute_seismograms.f90
+
+$O/compute_stacey_crust_mantle.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_stacey_crust_mantle.f90
+	${FCCOMPILE_CHECK} -c -o $O/compute_stacey_crust_mantle.o ${FCFLAGS_f90} $S/compute_stacey_crust_mantle.f90
+
+$O/compute_stacey_outer_core.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/compute_stacey_outer_core.f90
+	${FCCOMPILE_CHECK} -c -o $O/compute_stacey_outer_core.o ${FCFLAGS_f90} $S/compute_stacey_outer_core.f90
+
+$O/read_arrays_solver.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/read_arrays_solver.f90
+	${FCCOMPILE_CHECK} -c -o $O/read_arrays_solver.o ${FCFLAGS_f90} $S/read_arrays_solver.f90
+
+$O/read_forward_arrays.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/read_forward_arrays.f90
+	${FCCOMPILE_CHECK} -c -o $O/read_forward_arrays.o ${FCFLAGS_f90} $S/read_forward_arrays.f90
+
+$O/save_forward_arrays.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/save_forward_arrays.f90
+	${FCCOMPILE_CHECK} -c -o $O/save_forward_arrays.o ${FCFLAGS_f90} $S/save_forward_arrays.f90
+
+$O/save_kernels.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/save_kernels.f90
+	${FCCOMPILE_CHECK} -c -o $O/save_kernels.o ${FCFLAGS_f90} $S/save_kernels.f90
+
+$O/save_regular_kernels.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/save_regular_kernels.f90
+	${FCCOMPILE_CHECK} -c -o $O/save_regular_kernels.o ${FCFLAGS_f90} $S/save_regular_kernels.f90
+
+###
+### specfem3D - regular compilation options here
+###
+$O/comp_source_time_function.o: $S/comp_source_time_function.f90
+	${FCCOMPILE_CHECK} -c -o $O/comp_source_time_function.o ${FCFLAGS_f90} $S/comp_source_time_function.f90
+
+$O/comp_source_spectrum.o: ${SETUP}/constants.h $S/comp_source_spectrum.f90
+	${FCCOMPILE_CHECK} -c -o $O/comp_source_spectrum.o ${FCFLAGS_f90} $S/comp_source_spectrum.f90
+
+$O/compute_adj_source_frechet.o: ${SETUP}/constants.h $S/compute_adj_source_frechet.f90
+	${FCCOMPILE_CHECK} -c -o $O/compute_adj_source_frechet.o ${FCFLAGS_f90} $S/compute_adj_source_frechet.f90
+
+$O/compute_arrays_source.o: ${SETUP}/constants.h $S/compute_arrays_source.f90
+	${FCCOMPILE_CHECK} -c -o $O/compute_arrays_source.o ${FCFLAGS_f90} $S/compute_arrays_source.f90
+
+$O/convert_time.o: $S/convert_time.f90
+	${FCCOMPILE_CHECK} -c -o $O/convert_time.o ${FCFLAGS_f90} $S/convert_time.f90
+
+$O/define_derivation_matrices.o: ${SETUP}/constants.h $S/define_derivation_matrices.f90
+	${FCCOMPILE_CHECK} -c -o $O/define_derivation_matrices.o ${FCFLAGS_f90} $S/define_derivation_matrices.f90
+
+$O/get_attenuation.o: ${SETUP}/constants.h $S/get_attenuation.f90
+	${FCCOMPILE_CHECK} -c -o $O/get_attenuation.o ${FCFLAGS_f90} $S/get_attenuation.f90
+
+$O/get_backazimuth.o: ${SETUP}/constants.h $S/get_backazimuth.f90
+	${FCCOMPILE_CHECK} -c -o $O/get_backazimuth.o ${FCFLAGS_f90} $S/get_backazimuth.f90
+
+$O/get_cmt.o: ${SETUP}/constants.h $S/get_cmt.f90
+	${FCCOMPILE_CHECK} -c -o $O/get_cmt.o ${FCFLAGS_f90} $S/get_cmt.f90
+
+$O/multiply_arrays_source.o: ${SETUP}/constants.h $S/multiply_arrays_source.f90
+	${FCCOMPILE_CHECK} -c -o $O/multiply_arrays_source.o ${FCFLAGS_f90} $S/multiply_arrays_source.f90
+
+$O/netlib_specfun_erf.o: $S/netlib_specfun_erf.f90
+	${FCCOMPILE_CHECK} -c -o $O/netlib_specfun_erf.o ${FCFLAGS_f90} $S/netlib_specfun_erf.f90
+
+$O/recompute_jacobian.o: ${SETUP}/constants.h $S/recompute_jacobian.f90
+	${FCCOMPILE_CHECK} -c -o $O/recompute_jacobian.o ${FCFLAGS_f90} $S/recompute_jacobian.f90
+
+##
+## specfem3D - use MPI here & dependent on values from mesher here
+##
+$O/assemble_MPI_central_cube.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/assemble_MPI_central_cube.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/assemble_MPI_central_cube.o ${FCFLAGS_f90} $S/assemble_MPI_central_cube.f90
+
+$O/assemble_MPI_vector.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/assemble_MPI_vector.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/assemble_MPI_vector.o ${FCFLAGS_f90} $S/assemble_MPI_vector.f90
+
+$O/assemble_MPI_vector_block.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/assemble_MPI_vector_block.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/assemble_MPI_vector_block.o ${FCFLAGS_f90} $S/assemble_MPI_vector_block.f90
+
+$O/check_simulation_stability.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/check_simulation_stability.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/check_simulation_stability.o ${FCFLAGS_f90} $S/check_simulation_stability.f90
+
+$O/initialize_simulation.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/initialize_simulation.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/initialize_simulation.o ${FCFLAGS_f90} $S/initialize_simulation.f90
+
+$O/noise_tomography.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/noise_tomography.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/noise_tomography.o ${FCFLAGS_f90} $S/noise_tomography.f90
+
+$O/prepare_timerun.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/prepare_timerun.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/prepare_timerun.o ${FCFLAGS_f90} $S/prepare_timerun.f90
+
+$O/read_mesh_databases.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/read_mesh_databases.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/read_mesh_databases.o ${FCFLAGS_f90} $S/read_mesh_databases.f90
+
+$O/setup_sources_receivers.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/setup_sources_receivers.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/setup_sources_receivers.o ${FCFLAGS_f90} $S/setup_sources_receivers.f90
+
+$O/specfem3D.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/specfem3D.F90
+	${MPIFCCOMPILE_CHECK} -c -o $O/specfem3D.o ${FCFLAGS_f90} $S/specfem3D.F90
+
+$O/write_movie_surface.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/write_movie_surface.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/write_movie_surface.o ${FCFLAGS_f90} $S/write_movie_surface.f90
+
+$O/write_movie_volume.o: ${SETUP}/constants.h ${OUTPUT}/values_from_mesher.h $S/write_movie_volume.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/write_movie_volume.o ${FCFLAGS_f90} $S/write_movie_volume.f90
+
+##
+## specfem3D - non-dependent on values from mesher here
+##
+$O/assemble_MPI_central_cube_block.o: ${SETUP}/constants.h $S/assemble_MPI_central_cube_block.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/assemble_MPI_central_cube_block.o ${FCFLAGS_f90} $S/assemble_MPI_central_cube_block.f90
+
+$O/assemble_MPI_scalar.o: ${SETUP}/constants.h $S/assemble_MPI_scalar.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/assemble_MPI_scalar.o ${FCFLAGS_f90} $S/assemble_MPI_scalar.f90
+
+$O/assemble_MPI_scalar_block.o: ${SETUP}/constants.h $S/assemble_MPI_scalar_block.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/assemble_MPI_scalar_block.o ${FCFLAGS_f90} $S/assemble_MPI_scalar_block.f90
+
+$O/fix_non_blocking_flags.o: ${SETUP}/constants.h $S/fix_non_blocking_flags.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/fix_non_blocking_flags.o ${FCFLAGS_f90} $S/fix_non_blocking_flags.f90
+
+###
+### specfem3D - regular MPI compilation options here
+###
+$O/create_central_cube_buffers.o: ${SETUP}/constants.h $S/create_central_cube_buffers.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/create_central_cube_buffers.o ${FCFLAGS_f90} $S/create_central_cube_buffers.f90
+
+$O/get_event_info.o: ${SETUP}/constants.h $S/get_event_info.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/get_event_info.o ${FCFLAGS_f90} $S/get_event_info.f90
+
+$O/locate_receivers.o: ${SETUP}/constants.h $S/locate_receivers.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/locate_receivers.o ${FCFLAGS_f90} $S/locate_receivers.f90
+
+$O/locate_regular_points.o: ${SETUP}/constants.h $S/locate_regular_points.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/locate_regular_points.o ${FCFLAGS_f90} $S/locate_regular_points.f90
+
+$O/locate_sources.o: ${SETUP}/constants.h $S/locate_sources.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/locate_sources.o ${FCFLAGS_f90} $S/locate_sources.f90
+
+$O/read_arrays_buffers_solver.o: ${SETUP}/constants.h $S/read_arrays_buffers_solver.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/read_arrays_buffers_solver.o ${FCFLAGS_f90} $S/read_arrays_buffers_solver.f90
+
+$O/write_seismograms.o: ${SETUP}/constants.h $S/write_seismograms.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/write_seismograms.o ${FCFLAGS_f90} $S/write_seismograms.f90
+
+$O/write_output_ASCII.o: ${SETUP}/constants.h $S/write_output_ASCII.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/write_output_ASCII.o ${FCFLAGS_f90} $S/write_output_ASCII.f90
+
+$O/write_output_SAC.o: ${SETUP}/constants.h $S/write_output_SAC.f90
+	${MPIFCCOMPILE_CHECK} -c -o $O/write_output_SAC.o ${FCFLAGS_f90} $S/write_output_SAC.f90
+

Added: seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_regular_kernels.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_regular_kernels.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/branches/SPECFEM3D_GLOBE_SUNFLOWER/src/specfem3D/save_regular_kernels.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,549 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and CNRS / INRIA / University of Pau, France
+! (c) Princeton University and CNRS / INRIA / University of Pau
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine save_regular_kernels_crust_mantle(myrank, &
+                  npoints_slice, hxir_reg, hetar_reg, hgammar_reg, ispec_reg, &
+                  scale_t,scale_displ, &
+                  cijkl_kl_crust_mantle,rho_kl_crust_mantle, &
+                  alpha_kl_crust_mantle,beta_kl_crust_mantle, &
+                  ystore_crust_mantle,zstore_crust_mantle, &
+                  rhostore_crust_mantle,muvstore_crust_mantle, &
+                  kappavstore_crust_mantle,ibool_crust_mantle, &
+                  kappahstore_crust_mantle,muhstore_crust_mantle, &
+                  eta_anisostore_crust_mantle,ispec_is_tiso_crust_mantle, &
+              ! --idoubling_crust_mantle, &
+                  LOCAL_PATH)
+
+  implicit none
+
+  include "constants.h"
+  include "OUTPUT_FILES/values_from_mesher.h"
+
+  integer myrank
+
+  integer, intent(in) :: npoints_slice
+  real, dimension(NGLLX, NM_KL_REG_PTS), intent(in) :: hxir_reg
+  real, dimension(NGLLY, NM_KL_REG_PTS), intent(in) :: hetar_reg
+  real, dimension(NGLLZ, NM_KL_REG_PTS), intent(in) :: hgammar_reg
+  integer, dimension(NM_KL_REG_PTS), intent(in) :: ispec_reg
+
+  double precision :: scale_t,scale_displ
+
+  real(kind=CUSTOM_REAL), dimension(21,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+    cijkl_kl_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+    rho_kl_crust_mantle, beta_kl_crust_mantle, alpha_kl_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: &
+        ystore_crust_mantle,zstore_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: &
+        rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_TISO_MANTLE) :: &
+        kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle
+
+!  integer, dimension(NSPEC_CRUST_MANTLE) :: idoubling_crust_mantle
+  logical, dimension(NSPEC_CRUST_MANTLE) :: ispec_is_tiso_crust_mantle
+
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
+
+  character(len=150) LOCAL_PATH
+
+  ! local parameters
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: &
+    cijkl_kl_crust_mantle_reg
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: &
+    rho_kl_crust_mantle_reg, beta_kl_crust_mantle_reg, alpha_kl_crust_mantle_reg
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: &
+    mu_kl_crust_mantle, kappa_kl_crust_mantle, rhonotprime_kl_crust_mantle
+  real(kind=CUSTOM_REAL),dimension(21) ::  cijkl_kl_local
+  real(kind=CUSTOM_REAL) :: scale_kl,scale_kl_ani,scale_kl_rho
+  real(kind=CUSTOM_REAL) :: rhol,mul,kappal,rho_kl,alpha_kl,beta_kl
+  real(kind=CUSTOM_REAL) :: alphah_kl,alphav_kl,betah_kl,betav_kl,rhonotprime_kl
+  integer :: ispec,i,j,k,iglob
+  character(len=150) prname
+  double precision :: hlagrange
+  integer :: ipoint
+
+  ! transverse isotropic parameters
+  real(kind=CUSTOM_REAL), dimension(21) :: an_kl
+  real(kind=CUSTOM_REAL), dimension(:),allocatable :: &
+    alphav_kl_crust_mantle,alphah_kl_crust_mantle, &
+    betav_kl_crust_mantle,betah_kl_crust_mantle, &
+    eta_kl_crust_mantle
+
+  ! bulk parameterization
+  real(kind=CUSTOM_REAL), dimension(:),allocatable :: &
+    bulk_c_kl_crust_mantle,bulk_beta_kl_crust_mantle, &
+    bulk_betav_kl_crust_mantle,bulk_betah_kl_crust_mantle
+  real(kind=CUSTOM_REAL) :: A,C,F,L,N,eta
+  real(kind=CUSTOM_REAL) :: muvl,kappavl,muhl,kappahl
+  real(kind=CUSTOM_REAL) :: alphav_sq,alphah_sq,betav_sq,betah_sq,bulk_sq
+
+  ! scaling factors
+  scale_kl = scale_t/scale_displ * 1.d9
+  ! For anisotropic kernels
+  ! final unit : [s km^(-3) GPa^(-1)]
+  scale_kl_ani = scale_t**3 / (RHOAV*R_EARTH**3) * 1.d18
+  ! final unit : [s km^(-3) (kg/m^3)^(-1)]
+  scale_kl_rho = scale_t / scale_displ / RHOAV * 1.d9
+
+  ! allocates temporary arrays
+  allocate(cijkl_kl_crust_mantle_reg(21,npoints_slice), &
+           rho_kl_crust_mantle_reg(npoints_slice), &
+           beta_kl_crust_mantle_reg(npoints_slice), &
+           alpha_kl_crust_mantle_reg(npoints_slice))
+
+  if( SAVE_TRANSVERSE_KL ) then
+    ! transverse isotropic kernel arrays for file output
+    allocate(alphav_kl_crust_mantle(npoints_slice), &
+             alphah_kl_crust_mantle(npoints_slice), &
+             betav_kl_crust_mantle(npoints_slice), &
+             betah_kl_crust_mantle(npoints_slice), &
+             eta_kl_crust_mantle(npoints_slice))
+
+    ! isotropic kernel arrays for file output
+    allocate(bulk_c_kl_crust_mantle(npoints_slice), &
+             bulk_betav_kl_crust_mantle(npoints_slice), &
+             bulk_betah_kl_crust_mantle(npoints_slice), &
+             bulk_beta_kl_crust_mantle(npoints_slice))
+  endif
+
+  if( .not. ANISOTROPIC_KL ) then
+    ! allocates temporary isotropic kernel arrays for file output
+    allocate(bulk_c_kl_crust_mantle(npoints_slice), &
+             bulk_beta_kl_crust_mantle(npoints_slice))
+  endif
+
+  ! crust_mantle
+  do ipoint = 1, npoints_slice
+    ispec = ispec_reg(ipoint)
+    if (ANISOTROPIC_KL) then
+      if ( SAVE_TRANSVERSE_KL ) then
+        alphav_kl_crust_mantle(ipoint) = 0.0
+        alphah_kl_crust_mantle(ipoint) = 0.0
+        betav_kl_crust_mantle(ipoint) = 0.0
+        betah_kl_crust_mantle(ipoint) = 0.0
+        eta_kl_crust_mantle(ipoint) = 0.0
+        rho_kl_crust_mantle_reg(ipoint) = 0.0
+        bulk_c_kl_crust_mantle(ipoint) = 0.0
+        bulk_betav_kl_crust_mantle(ipoint) = 0.0
+        bulk_betah_kl_crust_mantle(ipoint) = 0.0
+        alpha_kl_crust_mantle_reg(ipoint) = 0.0
+        beta_kl_crust_mantle_reg(ipoint) = 0.0
+        bulk_beta_kl_crust_mantle(ipoint) = 0.0
+      else
+        rho_kl_crust_mantle_reg(ipoint) = 0.0
+        cijkl_kl_crust_mantle_reg(:,ipoint) = 0.0
+      endif
+    else
+      rhonotprime_kl_crust_mantle(ipoint) = 0.0
+      kappa_kl_crust_mantle(ipoint) = 0.0
+      mu_kl_crust_mantle(ipoint) = 0.0
+      rho_kl_crust_mantle_reg(ipoint) = 0.0
+      alpha_kl_crust_mantle_reg(ipoint) = 0.0
+      beta_kl_crust_mantle_reg(ipoint) = 0.0
+      bulk_c_kl_crust_mantle(ipoint) = 0.0
+      bulk_beta_kl_crust_mantle(ipoint) = 0.0
+    endif
+
+    do k = 1, NGLLZ
+      do j = 1, NGLLY
+        do i = 1, NGLLX
+
+          hlagrange = hxir_reg(i,ipoint)*hetar_reg(j,ipoint)*hgammar_reg(k,ipoint)
+
+          if (ANISOTROPIC_KL) then
+
+            ! For anisotropic kernels
+            iglob = ibool_crust_mantle(i,j,k,ispec)
+
+            ! The cartesian global cijkl_kl are rotated into the spherical local cijkl_kl
+            ! ystore and zstore are thetaval and phival (line 2252) -- dangerous
+            call rotate_kernels_dble(cijkl_kl_crust_mantle(:,i,j,k,ispec),cijkl_kl_local, &
+                                     ystore_crust_mantle(iglob),zstore_crust_mantle(iglob))
+
+            cijkl_kl_crust_mantle_reg(:,ipoint) = cijkl_kl_crust_mantle_reg(:,ipoint) &
+                                                + cijkl_kl_local * scale_kl_ani * hlagrange
+            rho_kl = rho_kl_crust_mantle(i,j,k,ispec) * scale_kl_rho * hlagrange
+
+            ! transverse isotropic kernel calculations
+            if( SAVE_TRANSVERSE_KL ) then
+              ! note: transverse isotropic kernels are calculated for all elements
+              !
+              !          however, the factors A,C,L,N,F are based only on transverse elements
+              !          in between Moho and 220 km layer, otherwise they are taken from isotropic values
+
+              rhol = rhostore_crust_mantle(i,j,k,ispec)
+
+              ! transverse isotropic parameters from compute_force_crust_mantle.f90
+              ! C=rhovpvsq A=rhovphsq L=rhovsvsq N=rhovshsq eta=F/(A - 2 L)
+
+              ! Get A,C,F,L,N,eta from kappa,mu
+              ! element can have transverse isotropy if between d220 and Moho
+              !if( .not. (TRANSVERSE_ISOTROPY_VAL .and. &
+              !    (idoubling_crust_mantle(ispec) == IFLAG_80_MOHO .or. &
+              !     idoubling_crust_mantle(ispec) == IFLAG_220_80))) then
+              if( .not. ispec_is_tiso_crust_mantle(ispec) ) then
+
+                ! layer with no transverse isotropy
+                ! A,C,L,N,F from isotropic model
+
+                mul = muvstore_crust_mantle(i,j,k,ispec)
+                kappal = kappavstore_crust_mantle(i,j,k,ispec)
+                muvl = mul
+                muhl = mul
+                kappavl = kappal
+                kappahl = kappal
+
+                A = kappal + FOUR_THIRDS * mul
+                C = A
+                L = mul
+                N = mul
+                F = kappal - 2._CUSTOM_REAL/3._CUSTOM_REAL * mul
+                eta = 1._CUSTOM_REAL
+
+              else
+
+                ! A,C,L,N,F from transverse isotropic model
+                kappavl = kappavstore_crust_mantle(i,j,k,ispec)
+                kappahl = kappahstore_crust_mantle(i,j,k,ispec)
+                muvl = muvstore_crust_mantle(i,j,k,ispec)
+                muhl = muhstore_crust_mantle(i,j,k,ispec)
+                kappal = kappavl
+
+                A = kappahl + FOUR_THIRDS * muhl
+                C = kappavl + FOUR_THIRDS * muvl
+                L = muvl
+                N = muhl
+                eta = eta_anisostore_crust_mantle(i,j,k,ispec)  ! that is  F / (A - 2 L)
+                F = eta * ( A - 2._CUSTOM_REAL * L )
+
+              endif
+
+              ! note: cijkl_kl_local() is fully anisotropic C_ij kernel components (non-dimensionalized)
+              !          for GLL point at (i,j,k,ispec)
+
+              ! Purpose : compute the kernels for the An coeffs (an_kl)
+              ! from the kernels for Cij (cijkl_kl_local)
+              ! At r,theta,phi fixed
+              ! kernel def : dx = kij * dcij + krho * drho
+              !                = kAn * dAn  + krho * drho
+
+              ! Definition of the input array cij_kl :
+              ! cij_kl(1) = C11 ; cij_kl(2) = C12 ; cij_kl(3) = C13
+              ! cij_kl(4) = C14 ; cij_kl(5) = C15 ; cij_kl(6) = C16
+              ! cij_kl(7) = C22 ; cij_kl(8) = C23 ; cij_kl(9) = C24
+              ! cij_kl(10) = C25 ; cij_kl(11) = C26 ; cij_kl(12) = C33
+              ! cij_kl(13) = C34 ; cij_kl(14) = C35 ; cij_kl(15) = C36
+              ! cij_kl(16) = C44 ; cij_kl(17) = C45 ; cij_kl(18) = C46
+              ! cij_kl(19) = C55 ; cij_kl(20) = C56 ; cij_kl(21) = C66
+              ! where the Cij (Voigt's notation) are defined as function of
+              ! the components of the elastic tensor in spherical coordinates
+              ! by eq. (A.1) of Chen & Tromp, GJI 168 (2007)
+
+              ! From the relations giving Cij in function of An
+              ! Checked with Min Chen's results (routine build_cij)
+
+              an_kl(1) = cijkl_kl_local(1)+cijkl_kl_local(2)+cijkl_kl_local(7)  !A
+              an_kl(2) = cijkl_kl_local(12)                                     !C
+              an_kl(3) = -2*cijkl_kl_local(2)+cijkl_kl_local(21)                !N
+              an_kl(4) = cijkl_kl_local(16)+cijkl_kl_local(19)                  !L
+              an_kl(5) = cijkl_kl_local(3)+cijkl_kl_local(8)                    !F
+
+              ! not used yet
+              !an_kl(6)=2*cijkl_kl_local(5)+2*cijkl_kl_local(10)+2*cijkl_kl_local(14)          !Jc
+              !an_kl(7)=2*cijkl_kl_local(4)+2*cijkl_kl_local(9)+2*cijkl_kl_local(13)           !Js
+              !an_kl(8)=-2*cijkl_kl_local(14)                                  !Kc
+              !an_kl(9)=-2*cijkl_kl_local(13)                                  !Ks
+              !an_kl(10)=-2*cijkl_kl_local(10)+cijkl_kl_local(18)                      !Mc
+              !an_kl(11)=2*cijkl_kl_local(4)-cijkl_kl_local(20)                        !Ms
+              !an_kl(12)=cijkl_kl_local(1)-cijkl_kl_local(7)                           !Bc
+              !an_kl(13)=-1./2.*(cijkl_kl_local(6)+cijkl_kl_local(11))                 !Bs
+              !an_kl(14)=cijkl_kl_local(3)-cijkl_kl_local(8)                           !Hc
+              !an_kl(15)=-cijkl_kl_local(15)                                   !Hs
+              !an_kl(16)=-cijkl_kl_local(16)+cijkl_kl_local(19)                        !Gc
+              !an_kl(17)=-cijkl_kl_local(17)                                   !Gs
+              !an_kl(18)=cijkl_kl_local(5)-cijkl_kl_local(10)-cijkl_kl_local(18)               !Dc
+              !an_kl(19)=cijkl_kl_local(4)-cijkl_kl_local(9)+cijkl_kl_local(20)                !Ds
+              !an_kl(20)=cijkl_kl_local(1)-cijkl_kl_local(2)+cijkl_kl_local(7)-cijkl_kl_local(21)      !Ec
+              !an_kl(21)=-cijkl_kl_local(6)+cijkl_kl_local(11)                         !Es
+
+              ! K_rho (primary kernel, for a parameterization (A,C,L,N,F,rho) )
+              rhonotprime_kl = rhol * rho_kl / scale_kl_rho
+              rhonotprime_kl_crust_mantle(ipoint) = rhonotprime_kl_crust_mantle(ipoint) + rhonotprime_kl
+
+              ! note: transverse isotropic kernels are calculated for ALL elements,
+              !          and not just transverse elements
+              !
+              ! note: the kernels are for relative perturbations (delta ln (m_i) = (m_i - m_0)/m_i )
+              !
+              ! Gets transverse isotropic kernels
+              ! (see Appendix B of Sieminski et al., GJI 171, 2007)
+
+              ! for parameterization: ( alpha_v, alpha_h, beta_v, beta_h, eta, rho )
+              ! K_alpha_v
+              alphav_kl = 2*C*an_kl(2) * hlagrange
+              alphav_kl_crust_mantle(ipoint) = alphav_kl_crust_mantle(ipoint) &
+                                             + alphav_kl
+              ! K_alpha_h
+              alphah_kl = (2*A*an_kl(1) + 2*A*eta*an_kl(5)) * hlagrange
+              alphah_kl_crust_mantle(ipoint) = alphah_kl_crust_mantle(ipoint) &
+                                             + alphah_kl
+              ! K_beta_v
+              betav_kl = (2*L*an_kl(4) - 4*L*eta*an_kl(5)) * hlagrange
+              betav_kl_crust_mantle(ipoint) = betav_kl_crust_mantle(ipoint) &
+                                            + betav_kl
+              ! K_beta_h
+              betah_kl = 2*N*an_kl(3) * hlagrange
+              betah_kl_crust_mantle(ipoint) = betah_kl_crust_mantle(ipoint) &
+                                            + betah_kl
+              ! K_eta
+              eta_kl_crust_mantle(ipoint) = eta_kl_crust_mantle(ipoint) + F*an_kl(5) * hlagrange
+              ! K_rhoprime  (for a parameterization (alpha_v, ..., rho) )
+              rho_kl_crust_mantle_reg(ipoint) = rho_kl_crust_mantle_reg(ipoint) &
+                                              + (A*an_kl(1) + C*an_kl(2) &
+                                               + N*an_kl(3) + L*an_kl(4) + F*an_kl(5)) * hlagrange &
+                                              + rhonotprime_kl
+
+              ! for parameterization: ( bulk, beta_v, beta_h, eta, rho )
+              ! where kappa_v = kappa_h = kappa and bulk c = sqrt( kappa / rho )
+              betav_sq = muvl / rhol
+              betah_sq = muhl / rhol
+              alphav_sq = ( kappal + FOUR_THIRDS * muvl ) / rhol
+              alphah_sq = ( kappal + FOUR_THIRDS * muhl ) / rhol
+              bulk_sq = kappal / rhol
+
+              bulk_c_kl_crust_mantle(ipoint) = bulk_c_kl_crust_mantle(ipoint) + &
+                bulk_sq / alphav_sq * alphav_kl + &
+                bulk_sq / alphah_sq * alphah_kl
+
+              bulk_betah_kl_crust_mantle(ipoint) = bulk_betah_kl_crust_mantle(ipoint) + &
+                betah_kl + FOUR_THIRDS * betah_sq / alphah_sq * alphah_kl
+
+              bulk_betav_kl_crust_mantle(ipoint) = bulk_betav_kl_crust_mantle(ipoint) + &
+                betav_kl + FOUR_THIRDS * betav_sq / alphav_sq * alphav_kl
+              ! the rest, K_eta and K_rho are the same as above
+            else
+
+              rho_kl_crust_mantle_reg(ipoint) = rho_kl_crust_mantle_reg(ipoint) + rho_kl
+
+            endif ! SAVE_TRANSVERSE_KL
+
+          else
+
+            ! isotropic kernels
+
+            rhol = rhostore_crust_mantle(i,j,k,ispec)
+            mul = muvstore_crust_mantle(i,j,k,ispec)
+            kappal = kappavstore_crust_mantle(i,j,k,ispec)
+
+            ! kernel values for rho, kappa, mu (primary kernel values)
+            rho_kl = - rhol * rho_kl_crust_mantle(i,j,k,ispec)
+            alpha_kl = - kappal * alpha_kl_crust_mantle(i,j,k,ispec) ! note: alpha_kl corresponds to K_kappa
+            beta_kl =  - 2 * mul * beta_kl_crust_mantle(i,j,k,ispec) ! note: beta_kl corresponds to K_mu
+
+            ! for a parameterization: (rho,mu,kappa) "primary" kernels
+            rhonotprime_kl_crust_mantle(ipoint) = rhonotprime_kl_crust_mantle(ipoint) &
+                                                + rho_kl * scale_kl * hlagrange
+            mu_kl_crust_mantle(ipoint) = mu_kl_crust_mantle(ipoint) + beta_kl * scale_kl * hlagrange
+            kappa_kl_crust_mantle(ipoint) = kappa_kl_crust_mantle(ipoint) + alpha_kl * scale_kl * hlagrange
+
+            ! for a parameterization: (rho,alpha,beta)
+            ! kernels rho^prime, beta, alpha
+            rho_kl_crust_mantle_reg(ipoint) = rho_kl_crust_mantle_reg(ipoint) &
+                                            + (rho_kl + alpha_kl + beta_kl) * scale_kl * hlagrange
+            beta_kl_crust_mantle_reg(ipoint) = beta_kl_crust_mantle_reg(ipoint) + &
+              2._CUSTOM_REAL * (beta_kl - FOUR_THIRDS * mul * alpha_kl / kappal) * scale_kl * hlagrange
+            alpha_kl_crust_mantle_reg(ipoint) = alpha_kl_crust_mantle_reg(ipoint) + &
+              2._CUSTOM_REAL * (1 +  FOUR_THIRDS * mul / kappal) * alpha_kl * scale_kl * hlagrange
+
+            ! for a parameterization: (rho,bulk, beta)
+            ! where bulk wave speed is c = sqrt( kappa / rho)
+            ! note: rhoprime is the same as for (rho,alpha,beta) parameterization
+            bulk_c_kl_crust_mantle(ipoint) = bulk_c_kl_crust_mantle(ipoint) &
+                                           + 2._CUSTOM_REAL * alpha_kl * scale_kl * hlagrange
+            bulk_beta_kl_crust_mantle(ipoint) = bulk_beta_kl_crust_mantle(ipoint) &
+                                              + 2._CUSTOM_REAL * beta_kl * scale_kl * hlagrange
+
+          endif
+
+        enddo
+      enddo
+    enddo
+
+    ! do some transforms that are independent of GLL points
+    if (ANISOTROPIC_KL) then
+      if (SAVE_TRANSVERSE_KL) then
+        ! write the kernel in physical units (01/05/2006)
+        rhonotprime_kl_crust_mantle(ipoint) = - rhonotprime_kl_crust_mantle(ipoint) * scale_kl
+
+        alphav_kl_crust_mantle(ipoint) = - alphav_kl_crust_mantle(ipoint) * scale_kl
+        alphah_kl_crust_mantle(ipoint) = - alphah_kl_crust_mantle(ipoint) * scale_kl
+        betav_kl_crust_mantle(ipoint) = - betav_kl_crust_mantle(ipoint) * scale_kl
+        betah_kl_crust_mantle(ipoint) = - betah_kl_crust_mantle(ipoint) * scale_kl
+        eta_kl_crust_mantle(ipoint) = - eta_kl_crust_mantle(ipoint) * scale_kl
+        rho_kl_crust_mantle_reg(ipoint) = - rho_kl_crust_mantle_reg(ipoint) * scale_kl
+
+        ! to check: isotropic kernels from transverse isotropic ones
+        alpha_kl_crust_mantle_reg(ipoint) = alphav_kl_crust_mantle(ipoint) &
+                                          + alphah_kl_crust_mantle(ipoint)
+        beta_kl_crust_mantle_reg(ipoint) = betav_kl_crust_mantle(ipoint) &
+                                         + betah_kl_crust_mantle(ipoint)
+        !rho_kl_crust_mantle_reg(ipoint) = rho_kl_crust_mantle_reg(ipoint) &
+        !                                + rhonotprime_kl_crust_mantle(ipoint) &
+        !                                + alpha_kl_crust_mantle_reg(ipoint) &
+        !                                + beta_kl_crust_mantle_reg(ipoint)
+        bulk_beta_kl_crust_mantle(ipoint) = bulk_betah_kl_crust_mantle(ipoint) &
+                                          + bulk_betav_kl_crust_mantle(ipoint)
+      endif
+    endif
+  enddo
+
+  call create_name_database(prname,myrank,IREGION_CRUST_MANTLE,LOCAL_PATH)
+
+  ! For anisotropic kernels
+  if (ANISOTROPIC_KL) then
+
+    ! outputs transverse isotropic kernels only
+    if (SAVE_TRANSVERSE_KL) then
+      ! transverse isotropic kernels
+      ! (alpha_v, alpha_h, beta_v, beta_h, eta, rho ) parameterization
+      open(unit=27,file=trim(prname)//'alphav_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) alphav_kl_crust_mantle
+      close(27)
+      open(unit=27,file=trim(prname)//'alphah_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) alphah_kl_crust_mantle
+      close(27)
+      open(unit=27,file=trim(prname)//'betav_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) betav_kl_crust_mantle
+      close(27)
+      open(unit=27,file=trim(prname)//'betah_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) betah_kl_crust_mantle
+      close(27)
+      open(unit=27,file=trim(prname)//'eta_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) eta_kl_crust_mantle
+      close(27)
+      open(unit=27,file=trim(prname)//'rho_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) rho_kl_crust_mantle_reg
+      close(27)
+
+      ! in case one is interested in primary kernel K_rho
+      !open(unit=27,file=trim(prname)//'rhonotprime_kernel.bin',status='unknown',form='unformatted',action='write')
+      !write(27) rhonotprime_kl_crust_mantle
+      !close(27)
+
+      ! (bulk, beta_v, beta_h, eta, rho ) parameterization: K_eta and K_rho same as above
+      open(unit=27,file=trim(prname)//'bulk_c_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) bulk_c_kl_crust_mantle
+      close(27)
+      open(unit=27,file=trim(prname)//'bulk_betav_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) bulk_betav_kl_crust_mantle
+      close(27)
+      open(unit=27,file=trim(prname)//'bulk_betah_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) bulk_betah_kl_crust_mantle
+      close(27)
+
+      ! to check: isotropic kernels
+      open(unit=27,file=trim(prname)//'alpha_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) alpha_kl_crust_mantle_reg
+      close(27)
+      open(unit=27,file=trim(prname)//'beta_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) beta_kl_crust_mantle_reg
+      close(27)
+      open(unit=27,file=trim(prname)//'bulk_beta_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) bulk_beta_kl_crust_mantle
+      close(27)
+
+    else
+
+      ! fully anisotropic kernels
+      ! note: the C_ij and density kernels are not for relative perturbations (delta ln( m_i) = delta m_i / m_i),
+      !          but absolute perturbations (delta m_i = m_i - m_0)
+      open(unit=27,file=trim(prname)//'rho_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) - rho_kl_crust_mantle_reg
+      close(27)
+      open(unit=27,file=trim(prname)//'cijkl_kernel.bin',status='unknown',form='unformatted',action='write')
+      write(27) - cijkl_kl_crust_mantle_reg
+      close(27)
+
+    endif
+
+  else
+    ! primary kernels: (rho,kappa,mu) parameterization
+    open(unit=27,file=trim(prname)//'rhonotprime_kernel.bin',status='unknown',form='unformatted',action='write')
+    write(27) rhonotprime_kl_crust_mantle
+    close(27)
+    open(unit=27,file=trim(prname)//'kappa_kernel.bin',status='unknown',form='unformatted',action='write')
+    write(27) kappa_kl_crust_mantle
+    close(27)
+    open(unit=27,file=trim(prname)//'mu_kernel.bin',status='unknown',form='unformatted',action='write')
+    write(27) mu_kl_crust_mantle
+    close(27)
+
+    ! (rho, alpha, beta ) parameterization
+    open(unit=27,file=trim(prname)//'rho_kernel.bin',status='unknown',form='unformatted',action='write')
+    write(27) rho_kl_crust_mantle_reg
+    close(27)
+    open(unit=27,file=trim(prname)//'alpha_kernel.bin',status='unknown',form='unformatted',action='write')
+    write(27) alpha_kl_crust_mantle_reg
+    close(27)
+    open(unit=27,file=trim(prname)//'beta_kernel.bin',status='unknown',form='unformatted',action='write')
+    write(27) beta_kl_crust_mantle_reg
+    close(27)
+
+    ! (rho, bulk, beta ) parameterization, K_rho same as above
+    open(unit=27,file=trim(prname)//'bulk_c_kernel.bin',status='unknown',form='unformatted',action='write')
+    write(27) bulk_c_kl_crust_mantle
+    close(27)
+    open(unit=27,file=trim(prname)//'bulk_beta_kernel.bin',status='unknown',form='unformatted',action='write')
+    write(27) bulk_beta_kl_crust_mantle
+    close(27)
+
+  endif
+
+  ! cleans up temporary kernel arrays
+  if (SAVE_TRANSVERSE_KL) then
+    deallocate(alphav_kl_crust_mantle,alphah_kl_crust_mantle, &
+               betav_kl_crust_mantle,betah_kl_crust_mantle, &
+               eta_kl_crust_mantle)
+    deallocate(bulk_c_kl_crust_mantle,bulk_betah_kl_crust_mantle, &
+               bulk_betav_kl_crust_mantle,bulk_beta_kl_crust_mantle)
+  endif
+  if (.not. ANISOTROPIC_KL) then
+    deallocate(bulk_c_kl_crust_mantle,bulk_beta_kl_crust_mantle)
+  endif
+  deallocate(cijkl_kl_crust_mantle_reg, &
+             rho_kl_crust_mantle_reg, &
+             beta_kl_crust_mantle_reg, &
+             alpha_kl_crust_mantle_reg)
+
+  end subroutine save_regular_kernels_crust_mantle
+
+

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_acoustic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_acoustic.F90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_acoustic.F90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,347 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine compute_forces_acoustic()
+
+  use specfem_par
+  use specfem_par_crustmantle,only: displ_crust_mantle,b_displ_crust_mantle, &
+                                    ibool_crust_mantle,ibelm_bottom_crust_mantle
+  use specfem_par_innercore,only: displ_inner_core,b_displ_inner_core, &
+                                  ibool_inner_core,ibelm_top_inner_core
+  use specfem_par_outercore
+  implicit none
+
+  ! local parameters
+  real(kind=CUSTOM_REAL) :: time,b_time
+  ! non blocking MPI
+  ! iphase: iphase = 1 is for computing outer elements in the outer_core,
+  !              iphase = 2 is for computing inner elements in the outer core (former icall parameter)
+  integer :: iphase
+  logical :: phase_is_inner
+
+  ! compute internal forces in the fluid region
+  if(CUSTOM_REAL == SIZE_REAL) then
+    time = sngl((dble(it-1)*DT-t0)*scale_t_inv)
+  else
+    time = (dble(it-1)*DT-t0)*scale_t_inv
+  endif
+  if (SIMULATION_TYPE == 3) then
+    ! note on backward/reconstructed wavefields:
+    !       b_time for b_displ( it=1 ) corresponds to (NSTEP - 1)*DT - t0  (after Newmark scheme...)
+    !       as we start with saved wavefields b_displ( 1 ) <-> displ( NSTEP ) which correspond
+    !       to a time (NSTEP - (it-1) - 1)*DT - t0
+    !       for reconstructing the rotational contributions
+    if(CUSTOM_REAL == SIZE_REAL) then
+      b_time = sngl((dble(NSTEP-it)*DT-t0)*scale_t_inv)
+    else
+      b_time = (dble(NSTEP-it)*DT-t0)*scale_t_inv
+    endif
+  endif
+
+  ! ****************************************************
+  !   big loop over all spectral elements in the fluid
+  ! ****************************************************
+
+  ! distinguishes two runs: for elements on MPI interfaces, and elements within the partitions
+  do iphase=1,2
+
+    ! first, iphase == 1 for points on MPI interfaces (thus outer elements)
+    ! second, iphase == 2 for points purely inside partition (thus inner elements)
+    !
+    ! compute all the outer elements first, then sends out non blocking MPI communication
+    ! and continues computing inner elements (overlapping)
+    if( iphase == 1 ) then
+      phase_is_inner = .false.
+    else
+      phase_is_inner = .true.
+    endif
+
+    if( .not. GPU_MODE ) then
+      ! on CPU
+      if( USE_DEVILLE_PRODUCTS_VAL ) then
+        ! uses Deville et al. (2002) routine
+        call compute_forces_outer_core_Dev(time,deltat,two_omega_earth, &
+                                      NSPEC_OUTER_CORE_ROTATION,NGLOB_OUTER_CORE, &
+                                      A_array_rotation,B_array_rotation, &
+                                      displ_outer_core,accel_outer_core, &
+                                      div_displ_outer_core,phase_is_inner)
+      else
+        ! div_displ_outer_core is initialized to zero in the following subroutine.
+        call compute_forces_outer_core(time,deltat,two_omega_earth, &
+                                      NSPEC_OUTER_CORE_ROTATION,NGLOB_OUTER_CORE, &
+                                      A_array_rotation,B_array_rotation, &
+                                      displ_outer_core,accel_outer_core, &
+                                      div_displ_outer_core,phase_is_inner)
+      endif
+
+      ! adjoint / kernel runs
+      if (SIMULATION_TYPE == 3) then
+        if( USE_DEVILLE_PRODUCTS_VAL ) then
+          ! uses Deville et al. (2002) routine
+          call compute_forces_outer_core_Dev(b_time,b_deltat,b_two_omega_earth, &
+                                      NSPEC_OUTER_CORE_ROT_ADJOINT,NGLOB_OUTER_CORE_ADJOINT, &
+                                      b_A_array_rotation,b_B_array_rotation, &
+                                      b_displ_outer_core,b_accel_outer_core, &
+                                      b_div_displ_outer_core,phase_is_inner)
+        else
+          call compute_forces_outer_core(b_time,b_deltat,b_two_omega_earth, &
+                                      NSPEC_OUTER_CORE_ROT_ADJOINT,NGLOB_OUTER_CORE_ADJOINT, &
+                                      b_A_array_rotation,b_B_array_rotation, &
+                                      b_displ_outer_core,b_accel_outer_core, &
+                                      b_div_displ_outer_core,phase_is_inner)
+        endif
+      endif
+
+    else
+      ! on GPU
+      ! includes both forward and adjoint/kernel simulations
+      call compute_forces_outer_core_cuda(Mesh_pointer,iphase,time,b_time)
+    endif
+
+
+    ! computes additional contributions to acceleration field
+    if( iphase == 1 ) then
+
+       ! Stacey absorbing boundaries
+       if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_outer_core()
+
+       ! ****************************************************
+       ! **********  add matching with solid part  **********
+       ! ****************************************************
+       ! only for elements in first matching layer in the fluid
+       if( .not. GPU_MODE ) then
+          ! on CPU
+          !---
+          !--- couple with mantle at the top of the outer core
+          !---
+          if(ACTUALLY_COUPLE_FLUID_CMB) &
+               call compute_coupling_fluid_CMB(displ_crust_mantle,b_displ_crust_mantle, &
+               ibool_crust_mantle,ibelm_bottom_crust_mantle,  &
+               accel_outer_core,b_accel_outer_core, &
+               normal_top_outer_core,jacobian2D_top_outer_core, &
+               wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
+               SIMULATION_TYPE,NSPEC2D_TOP(IREGION_OUTER_CORE))
+
+          !---
+          !--- couple with inner core at the bottom of the outer core
+          !---
+          if(ACTUALLY_COUPLE_FLUID_ICB) &
+               call compute_coupling_fluid_ICB(displ_inner_core,b_displ_inner_core, &
+               ibool_inner_core,ibelm_top_inner_core,  &
+               accel_outer_core,b_accel_outer_core, &
+               normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+               wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
+               SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_OUTER_CORE))
+
+       else
+          ! on GPU
+          !---
+          !--- couple with mantle at the top of the outer core
+          !---
+          if( ACTUALLY_COUPLE_FLUID_CMB ) &
+               call compute_coupling_fluid_cmb_cuda(Mesh_pointer)
+          !---
+          !--- couple with inner core at the bottom of the outer core
+          !---
+          if( ACTUALLY_COUPLE_FLUID_ICB ) &
+               call compute_coupling_fluid_icb_cuda(Mesh_pointer)
+
+       endif
+    endif ! iphase == 1
+
+    ! assemble all the contributions between slices using MPI
+    ! in outer core
+    if( iphase == 1 ) then
+      ! sends out MPI interface data (non-blocking)
+
+      if(.NOT. GPU_MODE) then
+        ! on CPU
+        call assemble_MPI_scalar_s(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
+                                accel_outer_core, &
+                                buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core, &
+                                num_interfaces_outer_core,max_nibool_interfaces_oc, &
+                                nibool_interfaces_outer_core,ibool_interfaces_outer_core,&
+                                my_neighbours_outer_core, &
+                                request_send_scalar_oc,request_recv_scalar_oc)
+      else
+        ! on GPU
+        ! outer core
+        call assemble_MPI_scalar_send_cuda(NPROCTOT_VAL, &
+                                buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core, &
+                                num_interfaces_outer_core,max_nibool_interfaces_oc, &
+                                nibool_interfaces_outer_core,&
+                                my_neighbours_outer_core, &
+                                request_send_scalar_oc,request_recv_scalar_oc, &
+                                1) ! <-- 1 == fwd accel
+      endif
+
+      ! adjoint simulations
+      if( SIMULATION_TYPE == 3 ) then
+        if(.NOT. GPU_MODE) then
+          ! on CPU
+          call assemble_MPI_scalar_s(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
+                                b_accel_outer_core, &
+                                b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core, &
+                                num_interfaces_outer_core,max_nibool_interfaces_oc, &
+                                nibool_interfaces_outer_core,ibool_interfaces_outer_core,&
+                                my_neighbours_outer_core, &
+                                b_request_send_scalar_oc,b_request_recv_scalar_oc)
+        else
+          ! on GPU
+          ! outer core
+          call assemble_MPI_scalar_send_cuda(NPROCTOT_VAL, &
+                                b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core, &
+                                num_interfaces_outer_core,max_nibool_interfaces_oc, &
+                                nibool_interfaces_outer_core,&
+                                my_neighbours_outer_core, &
+                                b_request_send_scalar_oc,b_request_recv_scalar_oc, &
+                                3) ! <-- 3 == adjoint b_accel
+        endif ! GPU
+      endif ! SIMULATION_TYPE == 3
+
+    else
+      ! make sure the last communications are finished and processed
+      ! waits for send/receive requests to be completed and assembles values
+      if(.NOT. GPU_MODE) then
+        ! on CPU
+        call assemble_MPI_scalar_w(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
+                                accel_outer_core, &
+                                buffer_recv_scalar_outer_core,num_interfaces_outer_core,&
+                                max_nibool_interfaces_oc, &
+                                nibool_interfaces_outer_core,ibool_interfaces_outer_core, &
+                                request_send_scalar_oc,request_recv_scalar_oc)
+      else
+        ! on GPU
+        call assemble_MPI_scalar_write_cuda(NPROCTOT_VAL, &
+                                buffer_recv_scalar_outer_core, &
+                                num_interfaces_outer_core,max_nibool_interfaces_oc, &
+                                request_send_scalar_oc,request_recv_scalar_oc, &
+                                1) ! <-- 1 == fwd accel
+      endif
+
+      ! adjoint simulations
+      if( SIMULATION_TYPE == 3 ) then
+        if(.NOT. GPU_MODE) then
+          ! on CPU
+          call assemble_MPI_scalar_w(NPROCTOT_VAL,NGLOB_OUTER_CORE, &
+                                b_accel_outer_core, &
+                                b_buffer_recv_scalar_outer_core,num_interfaces_outer_core,&
+                                max_nibool_interfaces_oc, &
+                                nibool_interfaces_outer_core,ibool_interfaces_outer_core, &
+                                b_request_send_scalar_oc,b_request_recv_scalar_oc)
+        else
+          ! on GPU
+          call assemble_MPI_scalar_write_cuda(NPROCTOT_VAL, &
+                                b_buffer_recv_scalar_outer_core, &
+                                num_interfaces_outer_core,max_nibool_interfaces_oc, &
+                                b_request_send_scalar_oc,b_request_recv_scalar_oc, &
+                                3) ! <-- 3 == adjoint b_accel
+        endif
+      endif ! SIMULATION_TYPE == 3
+    endif ! iphase == 1
+
+  enddo ! iphase
+
+  ! Newmark time scheme:
+  ! corrector terms for fluid parts
+  ! (multiply by the inverse of the mass matrix and update velocity)
+  if(.NOT. GPU_MODE) then
+    ! on CPU
+    call compute_forces_ac_update_veloc(NGLOB_OUTER_CORE,veloc_outer_core,accel_outer_core, &
+                                       deltatover2,rmass_outer_core)
+
+   ! adjoint / kernel runs
+    if (SIMULATION_TYPE == 3) &
+      call compute_forces_ac_update_veloc(NGLOB_OUTER_CORE_ADJOINT,b_veloc_outer_core,b_accel_outer_core, &
+                                         b_deltatover2,rmass_outer_core)
+
+  else
+    ! on GPU
+    call kernel_3_outer_core_cuda(Mesh_pointer, &
+                                 deltatover2,SIMULATION_TYPE,b_deltatover2)
+  endif
+
+  end subroutine compute_forces_acoustic
+
+!=====================================================================
+
+  subroutine compute_forces_ac_update_veloc(NGLOB,veloc_outer_core,accel_outer_core, &
+                                           deltatover2,rmass_outer_core)
+
+  use constants_solver,only: CUSTOM_REAL
+
+#ifdef _HANDOPT
+  use specfem_par,only: imodulo_NGLOB_OUTER_CORE
+#endif
+
+  implicit none
+
+  integer :: NGLOB
+
+  ! velocity potential
+  real(kind=CUSTOM_REAL), dimension(NGLOB) :: veloc_outer_core,accel_outer_core
+
+  ! mass matrix
+  real(kind=CUSTOM_REAL), dimension(NGLOB) :: rmass_outer_core
+
+  real(kind=CUSTOM_REAL) :: deltatover2
+
+  ! local parameters
+  integer :: i
+
+  ! Newmark time scheme
+  ! multiply by the inverse of the mass matrix and update velocity
+
+#ifdef _HANDOPT_NEWMARK
+! way 2:
+  ! outer core
+  if(imodulo_NGLOB_OUTER_CORE >= 1) then
+    do i=1,imodulo_NGLOB_OUTER_CORE
+      accel_outer_core(i) = accel_outer_core(i)*rmass_outer_core(i)
+      veloc_outer_core(i) = veloc_outer_core(i) + deltatover2*accel_outer_core(i)
+    enddo
+  endif
+  do i=imodulo_NGLOB_OUTER_CORE+1,NGLOB,3
+    accel_outer_core(i) = accel_outer_core(i)*rmass_outer_core(i)
+    veloc_outer_core(i) = veloc_outer_core(i) + deltatover2*accel_outer_core(i)
+
+    accel_outer_core(i+1) = accel_outer_core(i+1)*rmass_outer_core(i+1)
+    veloc_outer_core(i+1) = veloc_outer_core(i+1) + deltatover2*accel_outer_core(i+1)
+
+    accel_outer_core(i+2) = accel_outer_core(i+2)*rmass_outer_core(i+2)
+    veloc_outer_core(i+2) = veloc_outer_core(i+2) + deltatover2*accel_outer_core(i+2)
+  enddo
+#else
+! way 1:
+  do i=1,NGLOB
+    accel_outer_core(i) = accel_outer_core(i)*rmass_outer_core(i)
+    veloc_outer_core(i) = veloc_outer_core(i) + deltatover2*accel_outer_core(i)
+  enddo
+#endif
+
+  end subroutine compute_forces_ac_update_veloc
+

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_crust_mantle.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,959 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine compute_forces_crust_mantle(NSPEC,NGLOB,NSPEC_ATT, &
+                                        deltat, &
+                                        displ_crust_mantle, &
+                                        veloc_crust_mantle, &
+                                        accel_crust_mantle, &
+                                        phase_is_inner, &
+                                        R_xx,R_yy,R_xy,R_xz,R_yz, &
+                                        epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                                        epsilondev_xz,epsilondev_yz, &
+                                        epsilon_trace_over_3, &
+                                        alphaval,betaval,gammaval, &
+                                        factor_common,vx,vy,vz,vnspec)
+
+  use constants_solver
+
+  use specfem_par,only: &
+    hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+    minus_gravity_table,density_table,minus_deriv_gravity_table
+
+  use specfem_par_crustmantle,only: &
+    xstore => xstore_crust_mantle,ystore => ystore_crust_mantle,zstore => zstore_crust_mantle, &
+    xix => xix_crust_mantle,xiy => xiy_crust_mantle,xiz => xiz_crust_mantle, &
+    etax => etax_crust_mantle,etay => etay_crust_mantle,etaz => etaz_crust_mantle, &
+    gammax => gammax_crust_mantle,gammay => gammay_crust_mantle,gammaz => gammaz_crust_mantle, &
+    kappavstore => kappavstore_crust_mantle,kappahstore => kappahstore_crust_mantle, &
+    muvstore => muvstore_crust_mantle,muhstore => muhstore_crust_mantle, &
+    eta_anisostore => eta_anisostore_crust_mantle, &
+    c11store => c11store_crust_mantle,c12store => c12store_crust_mantle,c13store => c13store_crust_mantle, &
+    c14store => c14store_crust_mantle,c15store => c15store_crust_mantle,c16store => c16store_crust_mantle, &
+    c22store => c22store_crust_mantle,c23store => c23store_crust_mantle,c24store => c24store_crust_mantle, &
+    c25store => c25store_crust_mantle,c26store => c26store_crust_mantle,c33store => c33store_crust_mantle, &
+    c34store => c34store_crust_mantle,c35store => c35store_crust_mantle,c36store => c36store_crust_mantle, &
+    c44store => c44store_crust_mantle,c45store => c45store_crust_mantle,c46store => c46store_crust_mantle, &
+    c55store => c55store_crust_mantle,c56store => c56store_crust_mantle,c66store => c66store_crust_mantle, &
+    ibool => ibool_crust_mantle, &
+    ispec_is_tiso => ispec_is_tiso_crust_mantle, &
+    one_minus_sum_beta => one_minus_sum_beta_crust_mantle, &
+    phase_ispec_inner => phase_ispec_inner_crust_mantle, &
+    nspec_outer => nspec_outer_crust_mantle, &
+    nspec_inner => nspec_inner_crust_mantle
+
+  implicit none
+
+  integer :: NSPEC,NGLOB,NSPEC_ATT
+
+  ! time step
+  real(kind=CUSTOM_REAL) :: deltat
+
+  ! displacement, velocity and acceleration
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: veloc_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: accel_crust_mantle
+
+  ! variable sized array variables
+  integer :: vx,vy,vz,vnspec
+
+  ! memory variables for attenuation
+  ! memory variables R_ij are stored at the local rather than global level
+  ! to allow for optimization of cache access by compiler
+  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATT) :: R_xx,R_yy,R_xy,R_xz,R_yz
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: &
+    epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: epsilon_trace_over_3
+
+  ! [alpha,beta,gamma]val reduced to N_SLS and factor_common to N_SLS*NUM_NODES
+  real(kind=CUSTOM_REAL), dimension(N_SLS,vx,vy,vz,vnspec) :: factor_common
+  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
+
+  ! inner/outer element run flag
+  logical :: phase_is_inner
+
+  ! local parameters
+
+  ! for attenuation
+  real(kind=CUSTOM_REAL) one_minus_sum_beta_use,minus_sum_beta
+  real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ)   :: factor_common_c44_muv
+  real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
+
+  integer ispec,iglob,ispec_strain
+  integer i,j,k,l
+  integer i_SLS
+
+  ! the 21 coefficients for an anisotropic medium in reduced notation
+  real(kind=CUSTOM_REAL) c11,c22,c33,c44,c55,c66,c12,c13,c23,c14,c24,c34,c15,c25,c35,c45,c16,c26,c36,c46,c56
+
+  real(kind=CUSTOM_REAL) rhovphsq,sinphifour,cosphisq,sinphisq,costhetasq,rhovsvsq,sinthetasq, &
+        cosphifour,costhetafour,rhovpvsq,sinthetafour,rhovshsq,cosfourphi, &
+        costwotheta,cosfourtheta,sintwophisq,costheta,sinphi,sintheta,cosphi, &
+        sintwotheta,costwophi,sintwophi,costwothetasq,costwophisq,phi,theta
+
+  real(kind=CUSTOM_REAL) two_rhovsvsq,two_rhovshsq ! two_rhovpvsq,two_rhovphsq
+  real(kind=CUSTOM_REAL) four_rhovsvsq,four_rhovshsq ! four_rhovpvsq,four_rhovphsq,
+
+  real(kind=CUSTOM_REAL) twoetaminone,etaminone,eta_aniso
+  real(kind=CUSTOM_REAL) two_eta_aniso,four_eta_aniso,six_eta_aniso
+
+  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+  real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
+
+  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
+
+  real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
+
+  real(kind=CUSTOM_REAL) hp1,hp2,hp3
+  real(kind=CUSTOM_REAL) fac1,fac2,fac3
+  real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
+  real(kind=CUSTOM_REAL) kappal,kappavl,kappahl,muvl,muhl
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sum_terms
+
+  real(kind=CUSTOM_REAL) tempx1l,tempx2l,tempx3l
+  real(kind=CUSTOM_REAL) tempy1l,tempy2l,tempy3l
+  real(kind=CUSTOM_REAL) tempz1l,tempz2l,tempz3l
+
+  real(kind=CUSTOM_REAL) tempx1l_att,tempx2l_att,tempx3l_att
+  real(kind=CUSTOM_REAL) tempy1l_att,tempy2l_att,tempy3l_att
+  real(kind=CUSTOM_REAL) tempz1l_att,tempz2l_att,tempz3l_att
+
+  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
+  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att;
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att;
+
+  ! for gravity
+  integer int_radius
+  real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
+  double precision radius,rho,minus_g,minus_dg
+  double precision minus_g_over_radius,minus_dg_plus_g_over_radius
+  double precision cos_theta,sin_theta,cos_phi,sin_phi
+  double precision cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq
+  double precision factor,sx_l,sy_l,sz_l,gxl,gyl,gzl
+  double precision Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
+
+!  integer :: computed_elements
+  integer :: num_elements,ispec_p
+  integer :: iphase
+
+! ****************************************************
+!   big loop over all spectral elements in the solid
+! ****************************************************
+
+!  computed_elements = 0
+  if( .not. phase_is_inner ) then
+    iphase = 1
+    num_elements = nspec_outer
+  else
+    iphase = 2
+    num_elements = nspec_inner
+  endif
+
+  do ispec_p = 1,num_elements
+
+    ispec = phase_ispec_inner(ispec_p,iphase)
+
+    ! only compute element which belong to current phase (inner or outer elements)
+
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempx2l = 0._CUSTOM_REAL
+          tempx3l = 0._CUSTOM_REAL
+
+          tempy1l = 0._CUSTOM_REAL
+          tempy2l = 0._CUSTOM_REAL
+          tempy3l = 0._CUSTOM_REAL
+
+          tempz1l = 0._CUSTOM_REAL
+          tempz2l = 0._CUSTOM_REAL
+          tempz3l = 0._CUSTOM_REAL
+
+          do l=1,NGLLX
+            hp1 = hprime_xx(i,l)
+            iglob = ibool(l,j,k,ispec)
+            tempx1l = tempx1l + displ_crust_mantle(1,iglob)*hp1
+            tempy1l = tempy1l + displ_crust_mantle(2,iglob)*hp1
+            tempz1l = tempz1l + displ_crust_mantle(3,iglob)*hp1
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+            hp2 = hprime_yy(j,l)
+            iglob = ibool(i,l,k,ispec)
+            tempx2l = tempx2l + displ_crust_mantle(1,iglob)*hp2
+            tempy2l = tempy2l + displ_crust_mantle(2,iglob)*hp2
+            tempz2l = tempz2l + displ_crust_mantle(3,iglob)*hp2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+            hp3 = hprime_zz(k,l)
+            iglob = ibool(i,j,l,ispec)
+            tempx3l = tempx3l + displ_crust_mantle(1,iglob)*hp3
+            tempy3l = tempy3l + displ_crust_mantle(2,iglob)*hp3
+            tempz3l = tempz3l + displ_crust_mantle(3,iglob)*hp3
+          enddo
+
+          if( ATTENUATION_VAL .and. COMPUTE_AND_STORE_STRAIN ) then
+             ! temporary variables used for fixing attenuation in a consistent way
+
+             tempx1l_att = tempx1l
+             tempx2l_att = tempx2l
+             tempx3l_att = tempx3l
+
+             tempy1l_att = tempy1l
+             tempy2l_att = tempy2l
+             tempy3l_att = tempy3l
+
+             tempz1l_att = tempz1l
+             tempz2l_att = tempz2l
+             tempz3l_att = tempz3l
+
+             if(ATTENUATION_NEW_VAL) then
+                ! takes new routines
+                ! use first order Taylor expansion of displacement for local storage of stresses
+                ! at this current time step, to fix attenuation in a consistent way
+                do l=1,NGLLX
+                   hp1 = hprime_xx(i,l)
+                   iglob = ibool(l,j,k,ispec)
+                   tempx1l_att = tempx1l_att + deltat*veloc_crust_mantle(1,iglob)*hp1
+                   tempy1l_att = tempy1l_att + deltat*veloc_crust_mantle(2,iglob)*hp1
+                   tempz1l_att = tempz1l_att + deltat*veloc_crust_mantle(3,iglob)*hp1
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+                   hp2 = hprime_yy(j,l)
+                   iglob = ibool(i,l,k,ispec)
+                   tempx2l_att = tempx2l_att + deltat*veloc_crust_mantle(1,iglob)*hp2
+                   tempy2l_att = tempy2l_att + deltat*veloc_crust_mantle(2,iglob)*hp2
+                   tempz2l_att = tempz2l_att + deltat*veloc_crust_mantle(3,iglob)*hp2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+                   hp3 = hprime_zz(k,l)
+                   iglob = ibool(i,j,l,ispec)
+                   tempx3l_att = tempx3l_att + deltat*veloc_crust_mantle(1,iglob)*hp3
+                   tempy3l_att = tempy3l_att + deltat*veloc_crust_mantle(2,iglob)*hp3
+                   tempz3l_att = tempz3l_att + deltat*veloc_crust_mantle(3,iglob)*hp3
+                enddo
+             endif
+          endif
+
+!         get derivatives of ux, uy and uz with respect to x, y and z
+
+          xixl = xix(i,j,k,ispec)
+          xiyl = xiy(i,j,k,ispec)
+          xizl = xiz(i,j,k,ispec)
+          etaxl = etax(i,j,k,ispec)
+          etayl = etay(i,j,k,ispec)
+          etazl = etaz(i,j,k,ispec)
+          gammaxl = gammax(i,j,k,ispec)
+          gammayl = gammay(i,j,k,ispec)
+          gammazl = gammaz(i,j,k,ispec)
+
+! compute the jacobian
+          jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
+                        - xiyl*(etaxl*gammazl-etazl*gammaxl) &
+                        + xizl*(etaxl*gammayl-etayl*gammaxl))
+
+          duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+          duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+          duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+          duydxl = xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l
+          duydyl = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l
+          duydzl = xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l
+
+          duzdxl = xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l
+          duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l
+          duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l
+
+! precompute some sums to save CPU time
+          duxdxl_plus_duydyl = duxdxl + duydyl
+          duxdxl_plus_duzdzl = duxdxl + duzdzl
+          duydyl_plus_duzdzl = duydyl + duzdzl
+          duxdyl_plus_duydxl = duxdyl + duydxl
+          duzdxl_plus_duxdzl = duzdxl + duxdzl
+          duzdyl_plus_duydzl = duzdyl + duydzl
+
+          if( ATTENUATION_VAL .and. COMPUTE_AND_STORE_STRAIN ) then
+             ! temporary variables used for fixing attenuation in a consistent way
+             duxdxl_att = xixl*tempx1l_att + etaxl*tempx2l_att + gammaxl*tempx3l_att
+             duxdyl_att = xiyl*tempx1l_att + etayl*tempx2l_att + gammayl*tempx3l_att
+             duxdzl_att = xizl*tempx1l_att + etazl*tempx2l_att + gammazl*tempx3l_att
+
+             duydxl_att = xixl*tempy1l_att + etaxl*tempy2l_att + gammaxl*tempy3l_att
+             duydyl_att = xiyl*tempy1l_att + etayl*tempy2l_att + gammayl*tempy3l_att
+             duydzl_att = xizl*tempy1l_att + etazl*tempy2l_att + gammazl*tempy3l_att
+
+             duzdxl_att = xixl*tempz1l_att + etaxl*tempz2l_att + gammaxl*tempz3l_att
+             duzdyl_att = xiyl*tempz1l_att + etayl*tempz2l_att + gammayl*tempz3l_att
+             duzdzl_att = xizl*tempz1l_att + etazl*tempz2l_att + gammazl*tempz3l_att
+
+             ! precompute some sums to save CPU time
+             duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
+             duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
+             duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
+
+             ! compute deviatoric strain
+             if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) then
+                ispec_strain = 1
+             else
+                ispec_strain = ispec
+             endif
+             epsilon_trace_over_3(i,j,k,ispec_strain) = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
+             epsilondev_loc(1,i,j,k) = duxdxl_att - epsilon_trace_over_3(i,j,k,ispec_strain)
+             epsilondev_loc(2,i,j,k) = duydyl_att - epsilon_trace_over_3(i,j,k,ispec_strain)
+             epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl_att
+             epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
+             epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl_att
+          else
+             ! compute deviatoric strain
+             if (COMPUTE_AND_STORE_STRAIN) then
+                if(NSPEC_CRUST_MANTLE_STRAIN_ONLY == 1) then
+                   ispec_strain = 1
+                else
+                   ispec_strain = ispec
+                endif
+                epsilon_trace_over_3(i,j,k,ispec_strain) = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+                epsilondev_loc(1,i,j,k) = duxdxl - epsilon_trace_over_3(i,j,k,ispec_strain)
+                epsilondev_loc(2,i,j,k) = duydyl - epsilon_trace_over_3(i,j,k,ispec_strain)
+                epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl
+                epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl
+                epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
+             endif
+          endif
+
+          ! precompute terms for attenuation if needed
+          if( ATTENUATION_VAL ) then
+            if( USE_3D_ATTENUATION_ARRAYS ) then
+              one_minus_sum_beta_use = one_minus_sum_beta(i,j,k,ispec)
+            else
+              one_minus_sum_beta_use = one_minus_sum_beta(1,1,1,ispec)
+            endif
+            minus_sum_beta =  one_minus_sum_beta_use - 1.0_CUSTOM_REAL
+          endif
+
+          !
+          ! compute either isotropic or anisotropic elements
+          !
+
+          if(ANISOTROPIC_3D_MANTLE_VAL) then
+
+            c11 = c11store(i,j,k,ispec)
+            c12 = c12store(i,j,k,ispec)
+            c13 = c13store(i,j,k,ispec)
+            c14 = c14store(i,j,k,ispec)
+            c15 = c15store(i,j,k,ispec)
+            c16 = c16store(i,j,k,ispec)
+            c22 = c22store(i,j,k,ispec)
+            c23 = c23store(i,j,k,ispec)
+            c24 = c24store(i,j,k,ispec)
+            c25 = c25store(i,j,k,ispec)
+            c26 = c26store(i,j,k,ispec)
+            c33 = c33store(i,j,k,ispec)
+            c34 = c34store(i,j,k,ispec)
+            c35 = c35store(i,j,k,ispec)
+            c36 = c36store(i,j,k,ispec)
+            c44 = c44store(i,j,k,ispec)
+            c45 = c45store(i,j,k,ispec)
+            c46 = c46store(i,j,k,ispec)
+            c55 = c55store(i,j,k,ispec)
+            c56 = c56store(i,j,k,ispec)
+            c66 = c66store(i,j,k,ispec)
+
+            if(ATTENUATION_VAL) then
+              mul = c44
+              c11 = c11 + FOUR_THIRDS * minus_sum_beta * mul
+              c12 = c12 - TWO_THIRDS * minus_sum_beta * mul
+              c13 = c13 - TWO_THIRDS * minus_sum_beta * mul
+              c22 = c22 + FOUR_THIRDS * minus_sum_beta * mul
+              c23 = c23 - TWO_THIRDS * minus_sum_beta * mul
+              c33 = c33 + FOUR_THIRDS * minus_sum_beta * mul
+              c44 = c44 + minus_sum_beta * mul
+              c55 = c55 + minus_sum_beta * mul
+              c66 = c66 + minus_sum_beta * mul
+            endif
+
+            sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
+                       c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
+
+            sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
+                       c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
+
+            sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
+                       c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
+
+            sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
+                       c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
+
+            sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
+                       c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
+
+            sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
+                       c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
+
+          else
+
+          ! do not use transverse isotropy except if element is between d220 and Moho
+!            if(.not. (TRANSVERSE_ISOTROPY_VAL .and. (idoubling(ispec)==IFLAG_220_80 .or. idoubling(ispec)==IFLAG_80_MOHO))) then
+
+            if( .not. ispec_is_tiso(ispec) ) then
+
+              ! isotropic element
+
+              ! layer with no transverse isotropy, use kappav and muv
+              kappal = kappavstore(i,j,k,ispec)
+              mul = muvstore(i,j,k,ispec)
+
+              ! use unrelaxed parameters if attenuation
+              if(ATTENUATION_VAL) mul = mul * one_minus_sum_beta_use
+
+              lambdalplus2mul = kappal + FOUR_THIRDS * mul
+              lambdal = lambdalplus2mul - 2.*mul
+
+              ! compute stress sigma
+
+              sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
+              sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
+              sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
+
+              sigma_xy = mul*duxdyl_plus_duydxl
+              sigma_xz = mul*duzdxl_plus_duxdzl
+              sigma_yz = mul*duzdyl_plus_duydzl
+
+            else
+
+              ! transverse isotropic element
+
+              ! use Kappa and mu from transversely isotropic model
+              kappavl = kappavstore(i,j,k,ispec)
+              muvl = muvstore(i,j,k,ispec)
+
+              kappahl = kappahstore(i,j,k,ispec)
+              muhl = muhstore(i,j,k,ispec)
+
+              ! use unrelaxed parameters if attenuation
+              ! eta does not need to be shifted since it is a ratio
+              if(ATTENUATION_VAL) then
+                muvl = muvl * one_minus_sum_beta_use
+                muhl = muhl * one_minus_sum_beta_use
+              endif
+
+              rhovpvsq = kappavl + FOUR_THIRDS * muvl  !!! that is C
+              rhovphsq = kappahl + FOUR_THIRDS * muhl  !!! that is A
+
+              rhovsvsq = muvl  !!! that is L
+              rhovshsq = muhl  !!! that is N
+
+              eta_aniso = eta_anisostore(i,j,k,ispec)  !!! that is  F / (A - 2 L)
+
+              ! use mesh coordinates to get theta and phi
+              ! ystore and zstore contain theta and phi
+
+              iglob = ibool(i,j,k,ispec)
+              theta = ystore(iglob)
+              phi = zstore(iglob)
+
+              costheta = cos(theta)
+              sintheta = sin(theta)
+              cosphi = cos(phi)
+              sinphi = sin(phi)
+
+              costhetasq = costheta * costheta
+              sinthetasq = sintheta * sintheta
+              cosphisq = cosphi * cosphi
+              sinphisq = sinphi * sinphi
+
+              costhetafour = costhetasq * costhetasq
+              sinthetafour = sinthetasq * sinthetasq
+              cosphifour = cosphisq * cosphisq
+              sinphifour = sinphisq * sinphisq
+
+              costwotheta = cos(2.*theta)
+              sintwotheta = sin(2.*theta)
+              costwophi = cos(2.*phi)
+              sintwophi = sin(2.*phi)
+
+              cosfourtheta = cos(4.*theta)
+              cosfourphi = cos(4.*phi)
+
+              costwothetasq = costwotheta * costwotheta
+
+              costwophisq = costwophi * costwophi
+              sintwophisq = sintwophi * sintwophi
+
+              etaminone = eta_aniso - 1.
+              twoetaminone = 2. * eta_aniso - 1.
+
+              ! precompute some products to reduce the CPU time
+
+              two_eta_aniso = 2.*eta_aniso
+              four_eta_aniso = 4.*eta_aniso
+              six_eta_aniso = 6.*eta_aniso
+
+              !two_rhovpvsq = 2.*rhovpvsq
+              !two_rhovphsq = 2.*rhovphsq
+              two_rhovsvsq = 2.*rhovsvsq
+              two_rhovshsq = 2.*rhovshsq
+
+              !four_rhovpvsq = 4.*rhovpvsq
+              !four_rhovphsq = 4.*rhovphsq
+              four_rhovsvsq = 4.*rhovsvsq
+              four_rhovshsq = 4.*rhovshsq
+
+              ! the 21 anisotropic coefficients computed using Mathematica
+
+             c11 = rhovphsq*sinphifour + 2.*cosphisq*sinphisq* &
+               (rhovphsq*costhetasq + (eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq)* &
+                  sinthetasq) + cosphifour* &
+               (rhovphsq*costhetafour + 2.*(eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq)* &
+                  costhetasq*sinthetasq + rhovpvsq*sinthetafour)
+
+             c12 = ((rhovphsq - two_rhovshsq)*(3. + cosfourphi)*costhetasq)/4. - &
+              four_rhovshsq*cosphisq*costhetasq*sinphisq + &
+              (rhovphsq*(11. + 4.*costwotheta + cosfourtheta)*sintwophisq)/32. + &
+              eta_aniso*(rhovphsq - two_rhovsvsq)*(cosphifour + &
+                 2.*cosphisq*costhetasq*sinphisq + sinphifour)*sinthetasq + &
+              rhovpvsq*cosphisq*sinphisq*sinthetafour - &
+              rhovsvsq*sintwophisq*sinthetafour
+
+             c13 = (cosphisq*(rhovphsq + six_eta_aniso*rhovphsq + rhovpvsq - four_rhovsvsq - &
+                   12.*eta_aniso*rhovsvsq + (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - &
+                      four_eta_aniso*rhovsvsq)*cosfourtheta))/8. + &
+              sinphisq*(eta_aniso*(rhovphsq - two_rhovsvsq)*costhetasq + &
+                 (rhovphsq - two_rhovshsq)*sinthetasq)
+
+             c14 = costheta*sinphi*((cosphisq* &
+                   (-rhovphsq + rhovpvsq + four_rhovshsq - four_rhovsvsq + &
+                     (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + four_rhovsvsq - &
+                        four_eta_aniso*rhovsvsq)*costwotheta))/2. + &
+                (etaminone*rhovphsq + 2.*(rhovshsq - eta_aniso*rhovsvsq))*sinphisq)* sintheta
+
+             c15 = cosphi*costheta*((cosphisq* (-rhovphsq + rhovpvsq + &
+                     (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - four_eta_aniso*rhovsvsq)* &
+                      costwotheta))/2. + etaminone*(rhovphsq - two_rhovsvsq)*sinphisq)*sintheta
+
+             c16 = (cosphi*sinphi*(cosphisq* (-rhovphsq + rhovpvsq + &
+                     (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + four_rhovsvsq - &
+                        four_eta_aniso*rhovsvsq)*costwotheta) + &
+                  2.*etaminone*(rhovphsq - two_rhovsvsq)*sinphisq)*sinthetasq)/2.
+
+             c22 = rhovphsq*cosphifour + 2.*cosphisq*sinphisq* &
+               (rhovphsq*costhetasq + (eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq)* &
+                  sinthetasq) + sinphifour* &
+               (rhovphsq*costhetafour + 2.*(eta_aniso*rhovphsq + two_rhovsvsq - two_eta_aniso*rhovsvsq)* &
+                  costhetasq*sinthetasq + rhovpvsq*sinthetafour)
+
+             c23 = ((rhovphsq + six_eta_aniso*rhovphsq + rhovpvsq - four_rhovsvsq - 12.*eta_aniso*rhovsvsq + &
+                   (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - four_eta_aniso*rhovsvsq)* &
+                    cosfourtheta)*sinphisq)/8. + &
+              cosphisq*(eta_aniso*(rhovphsq - two_rhovsvsq)*costhetasq + &
+                 (rhovphsq - two_rhovshsq)*sinthetasq)
+
+             c24 = costheta*sinphi*(etaminone*(rhovphsq - two_rhovsvsq)*cosphisq + &
+                ((-rhovphsq + rhovpvsq + (twoetaminone*rhovphsq - rhovpvsq + &
+                        four_rhovsvsq - four_eta_aniso*rhovsvsq)*costwotheta)*sinphisq)/2.)*sintheta
+
+             c25 = cosphi*costheta*((etaminone*rhovphsq + 2.*(rhovshsq - eta_aniso*rhovsvsq))* &
+                 cosphisq + ((-rhovphsq + rhovpvsq + four_rhovshsq - four_rhovsvsq + &
+                     (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + four_rhovsvsq - &
+                        four_eta_aniso*rhovsvsq)*costwotheta)*sinphisq)/2.)*sintheta
+
+             c26 = (cosphi*sinphi*(2.*etaminone*(rhovphsq - two_rhovsvsq)*cosphisq + &
+                  (-rhovphsq + rhovpvsq + (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + four_rhovsvsq - &
+                        four_eta_aniso*rhovsvsq)*costwotheta)*sinphisq)*sinthetasq)/2.
+
+             c33 = rhovpvsq*costhetafour + 2.*(eta_aniso*(rhovphsq - two_rhovsvsq) + two_rhovsvsq)* &
+               costhetasq*sinthetasq + rhovphsq*sinthetafour
+
+             c34 = -((rhovphsq - rhovpvsq + (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq &
+                       - four_eta_aniso*rhovsvsq)*costwotheta)*sinphi*sintwotheta)/4.
+
+             c35 = -(cosphi*(rhovphsq - rhovpvsq + &
+                   (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - four_eta_aniso*rhovsvsq)* &
+                    costwotheta)*sintwotheta)/4.
+
+             c36 = -((rhovphsq - rhovpvsq - four_rhovshsq + four_rhovsvsq + &
+                   (twoetaminone*rhovphsq - rhovpvsq + four_rhovsvsq - four_eta_aniso*rhovsvsq)* &
+                    costwotheta)*sintwophi*sinthetasq)/4.
+
+             c44 = cosphisq*(rhovsvsq*costhetasq + rhovshsq*sinthetasq) + &
+              sinphisq*(rhovsvsq*costwothetasq + &
+                 (rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq + four_eta_aniso*rhovsvsq)*costhetasq* sinthetasq)
+
+             c45 = ((rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq - two_rhovshsq - two_rhovsvsq + &
+                  four_eta_aniso*rhovsvsq + (rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq + &
+                     4.*etaminone*rhovsvsq)*costwotheta)*sintwophi*sinthetasq)/4.
+
+             c46 = -(cosphi*costheta*((rhovshsq - rhovsvsq)*cosphisq - &
+                  ((rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq - two_rhovshsq - two_rhovsvsq + &
+                       four_eta_aniso*rhovsvsq + (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + &
+                          four_rhovsvsq - four_eta_aniso*rhovsvsq)*costwotheta)*sinphisq)/2.)* sintheta)
+
+             c55 = sinphisq*(rhovsvsq*costhetasq + rhovshsq*sinthetasq) + &
+              cosphisq*(rhovsvsq*costwothetasq + &
+                 (rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq + four_eta_aniso*rhovsvsq)*costhetasq* sinthetasq)
+
+             c56 = costheta*sinphi*((cosphisq* &
+                   (rhovphsq - two_eta_aniso*rhovphsq + rhovpvsq - two_rhovshsq - two_rhovsvsq + &
+                     four_eta_aniso*rhovsvsq + (-rhovphsq + two_eta_aniso*rhovphsq - rhovpvsq + &
+                        four_rhovsvsq - four_eta_aniso*rhovsvsq)*costwotheta))/2. + &
+                (-rhovshsq + rhovsvsq)*sinphisq)*sintheta
+
+             c66 = rhovshsq*costwophisq*costhetasq - &
+              2.*(rhovphsq - two_rhovshsq)*cosphisq*costhetasq*sinphisq + &
+              (rhovphsq*(11. + 4.*costwotheta + cosfourtheta)*sintwophisq)/32. - &
+              (rhovsvsq*(-6. - 2.*cosfourphi + cos(4.*phi - 2.*theta) - 2.*costwotheta + &
+                   cos(2.*(2.*phi + theta)))*sinthetasq)/8. + &
+              rhovpvsq*cosphisq*sinphisq*sinthetafour - &
+              (eta_aniso*(rhovphsq - two_rhovsvsq)*sintwophisq*sinthetafour)/2.
+
+             ! general expression of stress tensor for full Cijkl with 21 coefficients
+
+             sigma_xx = c11*duxdxl + c16*duxdyl_plus_duydxl + c12*duydyl + &
+                       c15*duzdxl_plus_duxdzl + c14*duzdyl_plus_duydzl + c13*duzdzl
+
+             sigma_yy = c12*duxdxl + c26*duxdyl_plus_duydxl + c22*duydyl + &
+                       c25*duzdxl_plus_duxdzl + c24*duzdyl_plus_duydzl + c23*duzdzl
+
+             sigma_zz = c13*duxdxl + c36*duxdyl_plus_duydxl + c23*duydyl + &
+                       c35*duzdxl_plus_duxdzl + c34*duzdyl_plus_duydzl + c33*duzdzl
+
+             sigma_xy = c16*duxdxl + c66*duxdyl_plus_duydxl + c26*duydyl + &
+                       c56*duzdxl_plus_duxdzl + c46*duzdyl_plus_duydzl + c36*duzdzl
+
+             sigma_xz = c15*duxdxl + c56*duxdyl_plus_duydxl + c25*duydyl + &
+                       c55*duzdxl_plus_duxdzl + c45*duzdyl_plus_duydzl + c35*duzdzl
+
+             sigma_yz = c14*duxdxl + c46*duxdyl_plus_duydxl + c24*duydyl + &
+                       c45*duzdxl_plus_duxdzl + c44*duzdyl_plus_duydzl + c34*duzdzl
+
+            endif
+
+          endif   ! end of test whether isotropic or anisotropic element
+
+          ! subtract memory variables if attenuation
+          if(ATTENUATION_VAL .and. ( USE_ATTENUATION_MIMIC .eqv. .false. ) ) then
+             do i_SLS = 1,N_SLS
+                R_xx_val = R_xx(i_SLS,i,j,k,ispec)
+                R_yy_val = R_yy(i_SLS,i,j,k,ispec)
+                sigma_xx = sigma_xx - R_xx_val
+                sigma_yy = sigma_yy - R_yy_val
+                sigma_zz = sigma_zz + R_xx_val + R_yy_val
+                sigma_xy = sigma_xy - R_xy(i_SLS,i,j,k,ispec)
+                sigma_xz = sigma_xz - R_xz(i_SLS,i,j,k,ispec)
+                sigma_yz = sigma_yz - R_yz(i_SLS,i,j,k,ispec)
+             enddo
+          endif
+
+          ! define symmetric components of sigma for gravity
+          sigma_yx = sigma_xy
+          sigma_zx = sigma_xz
+          sigma_zy = sigma_yz
+
+          ! compute non-symmetric terms for gravity
+          if(GRAVITY_VAL) then
+
+            ! use mesh coordinates to get theta and phi
+            ! x y and z contain r theta and phi
+
+            iglob = ibool(i,j,k,ispec)
+            radius = dble(xstore(iglob))
+            theta = ystore(iglob)
+            phi = zstore(iglob)
+
+            cos_theta = dcos(dble(theta))
+            sin_theta = dsin(dble(theta))
+            cos_phi = dcos(dble(phi))
+            sin_phi = dsin(dble(phi))
+
+            ! get g, rho and dg/dr=dg
+            ! spherical components of the gravitational acceleration
+            ! for efficiency replace with lookup table every 100 m in radial direction
+            int_radius = nint(radius * R_EARTH_KM * 10.d0)
+            minus_g = minus_gravity_table(int_radius)
+            minus_dg = minus_deriv_gravity_table(int_radius)
+            rho = density_table(int_radius)
+
+            ! Cartesian components of the gravitational acceleration
+            gxl = minus_g*sin_theta*cos_phi
+            gyl = minus_g*sin_theta*sin_phi
+            gzl = minus_g*cos_theta
+
+            ! Cartesian components of gradient of gravitational acceleration
+            ! obtained from spherical components
+
+            minus_g_over_radius = minus_g / radius
+            minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius
+
+            cos_theta_sq = cos_theta**2
+            sin_theta_sq = sin_theta**2
+            cos_phi_sq = cos_phi**2
+            sin_phi_sq = sin_phi**2
+
+            Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq
+            Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq
+            Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq
+            Hxyl = cos_phi*minus_dg_plus_g_over_radius*sin_phi*sin_theta_sq
+            Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta
+            Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
+
+            iglob = ibool(i,j,k,ispec)
+
+            ! distinguish between single and double precision for reals
+            if(CUSTOM_REAL == SIZE_REAL) then
+
+              ! get displacement and multiply by density to compute G tensor
+              sx_l = rho * dble(displ_crust_mantle(1,iglob))
+              sy_l = rho * dble(displ_crust_mantle(2,iglob))
+              sz_l = rho * dble(displ_crust_mantle(3,iglob))
+
+              ! compute G tensor from s . g and add to sigma (not symmetric)
+              sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
+              sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
+              sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
+
+              sigma_xy = sigma_xy - sngl(sx_l * gyl)
+              sigma_yx = sigma_yx - sngl(sy_l * gxl)
+
+              sigma_xz = sigma_xz - sngl(sx_l * gzl)
+              sigma_zx = sigma_zx - sngl(sz_l * gxl)
+
+              sigma_yz = sigma_yz - sngl(sy_l * gzl)
+              sigma_zy = sigma_zy - sngl(sz_l * gyl)
+
+              ! precompute vector
+              factor = dble(jacobianl) * wgll_cube(i,j,k)
+              rho_s_H(1,i,j,k) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+              rho_s_H(2,i,j,k) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+              rho_s_H(3,i,j,k) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+
+            else
+
+              ! get displacement and multiply by density to compute G tensor
+              sx_l = rho * displ_crust_mantle(1,iglob)
+              sy_l = rho * displ_crust_mantle(2,iglob)
+              sz_l = rho * displ_crust_mantle(3,iglob)
+
+              ! compute G tensor from s . g and add to sigma (not symmetric)
+              sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
+              sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
+              sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
+
+              sigma_xy = sigma_xy - sx_l * gyl
+              sigma_yx = sigma_yx - sy_l * gxl
+
+              sigma_xz = sigma_xz - sx_l * gzl
+              sigma_zx = sigma_zx - sz_l * gxl
+
+              sigma_yz = sigma_yz - sy_l * gzl
+              sigma_zy = sigma_zy - sz_l * gyl
+
+              ! precompute vector
+              factor = jacobianl * wgll_cube(i,j,k)
+              rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+              rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+              rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+
+            endif
+
+          endif  ! end of section with gravity terms
+
+          ! form dot product with test vector, non-symmetric form
+          tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl)
+          tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl)
+          tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl)
+
+          tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl)
+          tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl)
+          tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl)
+
+          tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl)
+          tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl)
+          tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl)
+
+        enddo ! NGLLX
+      enddo ! NGLLY
+    enddo ! NGLLZ
+
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempy1l = 0._CUSTOM_REAL
+          tempz1l = 0._CUSTOM_REAL
+
+          tempx2l = 0._CUSTOM_REAL
+          tempy2l = 0._CUSTOM_REAL
+          tempz2l = 0._CUSTOM_REAL
+
+          tempx3l = 0._CUSTOM_REAL
+          tempy3l = 0._CUSTOM_REAL
+          tempz3l = 0._CUSTOM_REAL
+
+          do l=1,NGLLX
+            fac1 = hprimewgll_xx(l,i)
+            tempx1l = tempx1l + tempx1(l,j,k)*fac1
+            tempy1l = tempy1l + tempy1(l,j,k)*fac1
+            tempz1l = tempz1l + tempz1(l,j,k)*fac1
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+            fac2 = hprimewgll_yy(l,j)
+            tempx2l = tempx2l + tempx2(i,l,k)*fac2
+            tempy2l = tempy2l + tempy2(i,l,k)*fac2
+            tempz2l = tempz2l + tempz2(i,l,k)*fac2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+            fac3 = hprimewgll_zz(l,k)
+            tempx3l = tempx3l + tempx3(i,j,l)*fac3
+            tempy3l = tempy3l + tempy3(i,j,l)*fac3
+            tempz3l = tempz3l + tempz3(i,j,l)*fac3
+          enddo
+
+          fac1 = wgllwgll_yz(j,k)
+          fac2 = wgllwgll_xz(i,k)
+          fac3 = wgllwgll_xy(i,j)
+
+          sum_terms(1,i,j,k) = - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l)
+          sum_terms(2,i,j,k) = - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l)
+          sum_terms(3,i,j,k) = - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l)
+
+          if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
+
+        enddo ! NGLLX
+      enddo ! NGLLY
+    enddo ! NGLLZ
+
+    ! sum contributions from each element to the global mesh and add gravity terms
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+          iglob = ibool(i,j,k,ispec)
+          accel_crust_mantle(:,iglob) = accel_crust_mantle(:,iglob) + sum_terms(:,i,j,k)
+        enddo
+      enddo
+    enddo
+
+! update memory variables based upon the Runge-Kutta scheme
+! convention for attenuation
+! term in xx = 1
+! term in yy = 2
+! term in xy = 3
+! term in xz = 4
+! term in yz = 5
+! term in zz not computed since zero trace
+! This is because we only implement Q_\mu attenuation and not Q_\kappa.
+! Note that this does *NOT* imply that there is no attenuation for P waves
+! because for Q_\kappa = infinity one gets (see for instance Dahlen and Tromp (1998)
+! equation (9.59) page 350): Q_\alpha = Q_\mu * 3 * (V_p/V_s)^2 / 4
+! therefore Q_\alpha is not zero; for instance for V_p / V_s = sqrt(3)
+! we get Q_\alpha = (9 / 4) * Q_\mu = 2.25 * Q_\mu
+
+    if(ATTENUATION_VAL .and. ( USE_ATTENUATION_MIMIC .eqv. .false. )) then
+
+! use Runge-Kutta scheme to march in time
+      do i_SLS = 1,N_SLS
+
+! get coefficients for that standard linear solid
+! IMPROVE we use mu_v here even if there is some anisotropy
+! IMPROVE we should probably use an average value instead
+
+        ! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
+        if(USE_3D_ATTENUATION_ARRAYS) then
+          if(ANISOTROPIC_3D_MANTLE_VAL) then
+            factor_common_c44_muv(:,:,:) = factor_common(i_SLS,:,:,:,ispec) * c44store(:,:,:,ispec)
+          else
+            factor_common_c44_muv(:,:,:) = factor_common(i_SLS,:,:,:,ispec) * muvstore(:,:,:,ispec)
+          endif
+        else
+          if(ANISOTROPIC_3D_MANTLE_VAL) then
+            factor_common_c44_muv(:,:,:) = factor_common(i_SLS,1,1,1,ispec) * c44store(:,:,:,ispec)
+          else
+            factor_common_c44_muv(:,:,:) = factor_common(i_SLS,1,1,1,ispec) * muvstore(:,:,:,ispec)
+          endif
+        endif
+
+!        do i_memory = 1,5
+!          R_memory(i_memory,i_SLS,:,:,:,ispec) = alphaval(i_SLS) * &
+!                    R_memory(i_memory,i_SLS,:,:,:,ispec) + &
+!                    factor_common_c44_muv * &
+!                    (betaval(i_SLS) * epsilondev(i_memory,:,:,:,ispec) + &
+!                    gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
+!        enddo
+
+        R_xx(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xx(i_SLS,:,:,:,ispec) + factor_common_c44_muv(:,:,:) * &
+              (betaval(i_SLS) * epsilondev_xx(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(1,:,:,:))
+
+        R_yy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yy(i_SLS,:,:,:,ispec) + factor_common_c44_muv(:,:,:) * &
+              (betaval(i_SLS) * epsilondev_yy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(2,:,:,:))
+
+        R_xy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xy(i_SLS,:,:,:,ispec) + factor_common_c44_muv(:,:,:) * &
+              (betaval(i_SLS) * epsilondev_xy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(3,:,:,:))
+
+        R_xz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xz(i_SLS,:,:,:,ispec) + factor_common_c44_muv(:,:,:) * &
+              (betaval(i_SLS) * epsilondev_xz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(4,:,:,:))
+
+        R_yz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yz(i_SLS,:,:,:,ispec) + factor_common_c44_muv(:,:,:) * &
+              (betaval(i_SLS) * epsilondev_yz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(5,:,:,:))
+
+      enddo
+    endif
+
+    ! save deviatoric strain for Runge-Kutta scheme
+    if(COMPUTE_AND_STORE_STRAIN) then
+      !epsilondev(:,:,:,:,ispec) = epsilondev_loc(:,:,:,:)
+      do k=1,NGLLZ
+        do j=1,NGLLY
+          do i=1,NGLLX
+            epsilondev_xx(i,j,k,ispec) = epsilondev_loc(1,i,j,k)
+            epsilondev_yy(i,j,k,ispec) = epsilondev_loc(2,i,j,k)
+            epsilondev_xy(i,j,k,ispec) = epsilondev_loc(3,i,j,k)
+            epsilondev_xz(i,j,k,ispec) = epsilondev_loc(4,i,j,k)
+            epsilondev_yz(i,j,k,ispec) = epsilondev_loc(5,i,j,k)
+          enddo
+        enddo
+      enddo
+    endif
+
+  enddo   ! spectral element loop NSPEC_CRUST_MANTLE
+
+  end subroutine compute_forces_crust_mantle
+

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_elastic.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_elastic.F90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_elastic.F90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,856 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine compute_forces_elastic()
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore,only: accel_outer_core,b_accel_outer_core, &
+                                  normal_top_outer_core,jacobian2D_top_outer_core, &
+                                  normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+                                  ibelm_top_outer_core,ibelm_bottom_outer_core, &
+                                  ibool_outer_core
+  use specfem_par_movie
+  implicit none
+
+  ! local parameters
+  ! non blocking MPI
+  ! iphase: iphase = 1 is for computing outer elements in the crust_mantle and inner_core regions,
+  !              iphase = 2 is for computing inner elements (former icall parameter)
+  integer :: iphase
+  logical :: phase_is_inner
+
+
+!daniel: att - debug
+!  integer :: iglob
+!  logical,parameter :: DEBUG = .false.
+!  if( DEBUG ) then
+!    iglob = ibool_crust_mantle(1,1,1,100)
+!    if( SIMULATION_TYPE == 1) then
+!      if( it == NSTEP .and. myrank == 0 ) then
+!        print*,'last step',it,'Rxx:',R_xx_crust_mantle(1,1,1,1,100),epsilondev_xx_crust_mantle(1,1,1,100), &
+!          displ_crust_mantle(1,iglob),displ_crust_mantle(2,iglob),displ_crust_mantle(3,iglob)
+!      endif
+!      if( it == NSTEP-1 .and. myrank == 0 ) then
+!        print*,'second last step',it,'Rxx:',R_xx_crust_mantle(1,1,1,1,100),epsilondev_xx_crust_mantle(1,1,1,100), &
+!          displ_crust_mantle(1,iglob),displ_crust_mantle(2,iglob),displ_crust_mantle(3,iglob)
+!      endif
+!      if( it == NSTEP-2 .and. myrank == 0 ) then
+!        print*,'third last step',it,'Rxx:',R_xx_crust_mantle(1,1,1,1,100),epsilondev_xx_crust_mantle(1,1,1,100), &
+!          displ_crust_mantle(1,iglob),displ_crust_mantle(2,iglob),displ_crust_mantle(3,iglob)
+!      endif
+!    elseif( SIMULATION_TYPE == 3 ) then
+!      if( it == 1 .and. myrank == 0 ) then
+!        print*,'first step',it,'Rxx:',b_R_xx_crust_mantle(1,1,1,1,100),b_epsilondev_xx_crust_mantle(1,1,1,100), &
+!          b_displ_crust_mantle(1,iglob),b_displ_crust_mantle(2,iglob),b_displ_crust_mantle(3,iglob)
+!      endif
+!      if( it == 2 .and. myrank == 0 ) then
+!        print*,'second step',it,'Rxx:',b_R_xx_crust_mantle(1,1,1,1,100),b_epsilondev_xx_crust_mantle(1,1,1,100), &
+!          b_displ_crust_mantle(1,iglob),b_displ_crust_mantle(2,iglob),b_displ_crust_mantle(3,iglob)
+!      endif
+!      if( it == 3 .and. myrank == 0 ) then
+!        print*,'third step',it,'Rxx:',b_R_xx_crust_mantle(1,1,1,1,100),b_epsilondev_xx_crust_mantle(1,1,1,100), &
+!          b_displ_crust_mantle(1,iglob),b_displ_crust_mantle(2,iglob),b_displ_crust_mantle(3,iglob)
+!      endif
+!    endif
+!  endif
+
+
+  ! ****************************************************
+  !   big loop over all spectral elements in the solid
+  ! ****************************************************
+
+  ! compute internal forces in the solid regions
+
+  ! for anisotropy and gravity, x y and z contain r theta and phi
+
+  ! distinguishes two runs: for elements on MPI interfaces, and elements within the partitions
+  do iphase = 1,2
+
+    ! first, iphase == 1 for points on MPI interfaces (thus outer elements)
+    ! second, iphase == 2 for points purely inside partition (thus inner elements)
+    !
+    ! compute all the outer elements first, then sends out non blocking MPI communication
+    ! and continues computing inner elements (overlapping)
+    if( iphase == 1 ) then
+      phase_is_inner = .false.
+    else
+      phase_is_inner = .true.
+    endif
+
+
+    if( .NOT. GPU_MODE ) then
+       ! on CPU
+
+       ! compute internal forces in the solid regions
+       ! note: for anisotropy and gravity, x y and z contain r theta and phi
+       if( USE_DEVILLE_PRODUCTS_VAL ) then
+          ! uses Deville (2002) optimizations
+          ! crust/mantle region
+          call compute_forces_crust_mantle_Dev( NSPEC_CRUST_MANTLE_STR_OR_ATT,NGLOB_CRUST_MANTLE, &
+               NSPEC_CRUST_MANTLE_ATTENUAT, &
+               deltat, &
+               displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle, &
+               phase_is_inner, &
+               R_xx_crust_mantle,R_yy_crust_mantle,R_xy_crust_mantle, &
+               R_xz_crust_mantle,R_yz_crust_mantle, &
+               epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+               epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+               eps_trace_over_3_crust_mantle, &
+               alphaval,betaval,gammaval,factor_common_crust_mantle, &
+               size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
+               size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5), .false. )
+          ! inner core region
+          call compute_forces_inner_core_Dev( NSPEC_INNER_CORE_STR_OR_ATT,NGLOB_INNER_CORE, &
+               NSPEC_INNER_CORE_ATTENUATION, &
+               deltat, &
+               displ_inner_core,veloc_inner_core,accel_inner_core, &
+               phase_is_inner, &
+               R_xx_inner_core,R_yy_inner_core,R_xy_inner_core,R_xz_inner_core,R_yz_inner_core, &
+               epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+               epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
+               eps_trace_over_3_inner_core,&
+               alphaval,betaval,gammaval, &
+               factor_common_inner_core, &
+               size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
+               size(factor_common_inner_core,4), size(factor_common_inner_core,5), .false. )
+
+       else
+          ! no Deville optimization
+          ! crust/mantle region
+          call compute_forces_crust_mantle(  NSPEC_CRUST_MANTLE_STR_OR_ATT,NGLOB_CRUST_MANTLE, &
+               NSPEC_CRUST_MANTLE_ATTENUAT, &
+               deltat, &
+               displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle, &
+               phase_is_inner, &
+               R_xx_crust_mantle,R_yy_crust_mantle,R_xy_crust_mantle, &
+               R_xz_crust_mantle,R_yz_crust_mantle, &
+               epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+               epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+               eps_trace_over_3_crust_mantle, &
+               alphaval,betaval,gammaval,factor_common_crust_mantle, &
+               size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
+               size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
+          ! inner core region
+          call compute_forces_inner_core( NSPEC_INNER_CORE_STR_OR_ATT,NGLOB_INNER_CORE, &
+               NSPEC_INNER_CORE_ATTENUATION, &
+               deltat, &
+               displ_inner_core,veloc_inner_core,accel_inner_core, &
+               phase_is_inner, &
+               R_xx_inner_core,R_yy_inner_core,R_xy_inner_core,R_xz_inner_core,R_yz_inner_core, &
+               epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+               epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
+               eps_trace_over_3_inner_core,&
+               alphaval,betaval,gammaval, &
+               factor_common_inner_core, &
+               size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
+               size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
+
+       endif
+
+       ! adjoint / kernel runs
+       if (SIMULATION_TYPE == 3 ) then
+          if( USE_DEVILLE_PRODUCTS_VAL ) then
+             ! uses Deville (2002) optimizations
+             ! crust/mantle region
+             call compute_forces_crust_mantle_Dev( NSPEC_CRUST_MANTLE_ADJOINT,NGLOB_CRUST_MANTLE_ADJOINT, &
+                  NSPEC_CRUST_MANTLE_STR_AND_ATT, &
+                  b_deltat, &
+                  b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
+                  phase_is_inner, &
+                  b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
+                  b_R_xz_crust_mantle,b_R_yz_crust_mantle, &
+                  b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,&
+                  b_epsilondev_xy_crust_mantle, &
+                  b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
+                  b_eps_trace_over_3_crust_mantle, &
+                  b_alphaval,b_betaval,b_gammaval,factor_common_crust_mantle, &
+                  size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
+                  size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5), .true. )
+             ! inner core region
+             call compute_forces_inner_core_Dev( NSPEC_INNER_CORE_ADJOINT,NGLOB_INNER_CORE_ADJOINT, &
+                  NSPEC_INNER_CORE_STR_AND_ATT, &
+                  b_deltat, &
+                  b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
+                  phase_is_inner, &
+                  b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
+                  b_R_xz_inner_core,b_R_yz_inner_core, &
+                  b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
+                  b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
+                  b_eps_trace_over_3_inner_core,&
+                  b_alphaval,b_betaval,b_gammaval, &
+                  factor_common_inner_core, &
+                  size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
+                  size(factor_common_inner_core,4), size(factor_common_inner_core,5), .true. )
+
+          else
+             ! no Deville optimization
+             ! crust/mantle region
+             call compute_forces_crust_mantle( NSPEC_CRUST_MANTLE_ADJOINT,NGLOB_CRUST_MANTLE_ADJOINT, &
+                  NSPEC_CRUST_MANTLE_STR_AND_ATT, &
+                  b_deltat, &
+                  b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle, &
+                  phase_is_inner, &
+                  b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle, &
+                  b_R_xz_crust_mantle,b_R_yz_crust_mantle, &
+                  b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,&
+                  b_epsilondev_xy_crust_mantle, &
+                  b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle, &
+                  b_eps_trace_over_3_crust_mantle, &
+                  b_alphaval,b_betaval,b_gammaval,factor_common_crust_mantle, &
+                  size(factor_common_crust_mantle,2), size(factor_common_crust_mantle,3), &
+                  size(factor_common_crust_mantle,4), size(factor_common_crust_mantle,5) )
+
+             ! inner core region
+             call compute_forces_inner_core( NSPEC_INNER_CORE_ADJOINT,NGLOB_INNER_CORE_ADJOINT, &
+                  NSPEC_INNER_CORE_STR_AND_ATT, &
+                  b_deltat, &
+                  b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core, &
+                  phase_is_inner, &
+                  b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core, &
+                  b_R_xz_inner_core,b_R_yz_inner_core, &
+                  b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
+                  b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core, &
+                  b_eps_trace_over_3_inner_core,&
+                  b_alphaval,b_betaval,b_gammaval, &
+                  factor_common_inner_core, &
+                  size(factor_common_inner_core,2), size(factor_common_inner_core,3), &
+                  size(factor_common_inner_core,4), size(factor_common_inner_core,5) )
+          endif
+       endif !SIMULATION_TYPE == 3
+
+    else
+       ! on GPU
+       ! contains both forward SIM_TYPE==1 and backward SIM_TYPE==3 simulations
+       ! for crust/mantle
+       call compute_forces_crust_mantle_cuda(Mesh_pointer,iphase)
+       ! for inner core
+       call compute_forces_inner_core_cuda(Mesh_pointer,iphase)
+    endif ! GPU_MODE
+
+
+    ! computes additional contributions to acceleration field
+    if( iphase == 1 ) then
+
+       ! absorbing boundaries
+       ! Stacey
+       if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) call compute_stacey_crust_mantle()
+
+       ! add the sources
+
+       ! add adjoint sources
+       if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
+          if( nadj_rec_local > 0 ) call compute_add_sources_adjoint()
+       endif
+
+       ! add the sources
+       select case( NOISE_TOMOGRAPHY )
+       case( 0 )
+          ! regular forward or backward simulation, no noise tomography simulation
+          ! adds sources for forward simulation
+          if (SIMULATION_TYPE == 1 .and. nsources_local > 0) &
+            call compute_add_sources()
+          ! add sources for backward/reconstructed wavefield
+          if (SIMULATION_TYPE == 3 .and. nsources_local > 0) &
+            call compute_add_sources_backward()
+
+       case( 1 )
+          ! the first step of noise tomography is to use |S(\omega)|^2 as a point force source at one of the receivers.
+          ! hence, instead of a moment tensor 'sourcearrays', a 'noise_sourcearray' for a point force is needed.
+          ! furthermore, the CMTSOLUTION needs to be zero, i.e., no earthquakes.
+          ! now this must be manually set in DATA/CMTSOLUTION, by USERS.
+          call noise_add_source_master_rec()
+
+       case( 2 )
+          ! second step of noise tomography, i.e., read the surface movie saved at every timestep
+          ! use the movie to drive the ensemble forward wavefield
+          call noise_read_add_surface_movie(NGLOB_CRUST_MANTLE,accel_crust_mantle,NSTEP-it+1)
+          ! be careful, since ensemble forward sources are reversals of generating wavefield "eta"
+          ! hence the "NSTEP-it+1", i.e., start reading from the last timestep
+          ! note the ensemble forward sources are generally distributed on the surface of the earth
+          ! that's to say, the ensemble forward source is kind of a surface force density, not a body force density
+          ! therefore, we must add it here, before applying the inverse of mass matrix
+
+       case( 3 )
+          ! third step of noise tomography, i.e., read the surface movie saved at every timestep
+          ! use the movie to reconstruct the ensemble forward wavefield
+          ! the ensemble adjoint wavefield is done as usual
+          ! note instead of "NSTEP-it+1", now we us "it", since reconstruction is a reversal of reversal
+          call noise_read_add_surface_movie(NGLOB_CRUST_MANTLE_ADJOINT,b_accel_crust_mantle,it)
+
+       end select
+
+
+       ! ****************************************************
+       ! **********  add matching with fluid part  **********
+       ! ****************************************************
+       ! only for elements in first matching layer in the solid
+       if( .not. GPU_MODE ) then
+          ! on CPU
+          !---
+          !--- couple with outer core at the bottom of the mantle
+          !---
+          if(ACTUALLY_COUPLE_FLUID_CMB) &
+            call compute_coupling_CMB_fluid(displ_crust_mantle,b_displ_crust_mantle, &
+                                           accel_crust_mantle,b_accel_crust_mantle, &
+                                           ibool_crust_mantle,ibelm_bottom_crust_mantle,  &
+                                           accel_outer_core,b_accel_outer_core, &
+                                           normal_top_outer_core,jacobian2D_top_outer_core, &
+                                           wgllwgll_xy,ibool_outer_core,ibelm_top_outer_core, &
+                                           RHO_TOP_OC,minus_g_cmb, &
+                                           SIMULATION_TYPE,NSPEC2D_BOTTOM(IREGION_CRUST_MANTLE))
+
+          !---
+          !--- couple with outer core at the top of the inner core
+          !---
+          if(ACTUALLY_COUPLE_FLUID_ICB) &
+            call compute_coupling_ICB_fluid(displ_inner_core,b_displ_inner_core, &
+                                           accel_inner_core,b_accel_inner_core, &
+                                           ibool_inner_core,ibelm_top_inner_core,  &
+                                           accel_outer_core,b_accel_outer_core, &
+                                           normal_bottom_outer_core,jacobian2D_bottom_outer_core, &
+                                           wgllwgll_xy,ibool_outer_core,ibelm_bottom_outer_core, &
+                                           RHO_BOTTOM_OC,minus_g_icb, &
+                                           SIMULATION_TYPE,NSPEC2D_TOP(IREGION_INNER_CORE))
+
+       else
+          ! on GPU
+          !---
+          !--- couple with outer core at the bottom of the mantle
+          !---
+          if( ACTUALLY_COUPLE_FLUID_CMB ) &
+               call compute_coupling_cmb_fluid_cuda(Mesh_pointer)
+          !---
+          !--- couple with outer core at the top of the inner core
+          !---
+          if( ACTUALLY_COUPLE_FLUID_ICB ) &
+               call compute_coupling_icb_fluid_cuda(Mesh_pointer)
+
+       endif
+    endif ! iphase == 1
+
+    ! assemble all the contributions between slices using MPI
+
+    ! crust/mantle and inner core handled in the same call
+    ! in order to reduce the number of MPI messages by 2
+
+    if( iphase == 1 ) then
+      ! sends out MPI interface data
+      if(.NOT. GPU_MODE) then
+        ! on CPU
+        ! sends accel values to corresponding MPI interface neighbors
+        ! crust mantle
+        call assemble_MPI_vector_s(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
+                      accel_crust_mantle, &
+                      buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle, &
+                      num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
+                      nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle,&
+                      my_neighbours_crust_mantle, &
+                      request_send_vector_cm,request_recv_vector_cm)
+        ! inner core
+        call assemble_MPI_vector_s(NPROCTOT_VAL,NGLOB_INNER_CORE, &
+                      accel_inner_core, &
+                      buffer_send_vector_inner_core,buffer_recv_vector_inner_core, &
+                      num_interfaces_inner_core,max_nibool_interfaces_ic, &
+                      nibool_interfaces_inner_core,ibool_interfaces_inner_core,&
+                      my_neighbours_inner_core, &
+                      request_send_vector_ic,request_recv_vector_ic)
+      else
+        ! on GPU
+        ! crust mantle
+        call assemble_MPI_vector_send_cuda(NPROCTOT_VAL, &
+                      buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle, &
+                      num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
+                      nibool_interfaces_crust_mantle,&
+                      my_neighbours_crust_mantle, &
+                      request_send_vector_cm,request_recv_vector_cm, &
+                      IREGION_CRUST_MANTLE, &
+                      1) ! <-- 1 == fwd accel
+        ! inner core
+        call assemble_MPI_vector_send_cuda(NPROCTOT_VAL, &
+                      buffer_send_vector_inner_core,buffer_recv_vector_inner_core, &
+                      num_interfaces_inner_core,max_nibool_interfaces_ic, &
+                      nibool_interfaces_inner_core,&
+                      my_neighbours_inner_core, &
+                      request_send_vector_ic,request_recv_vector_ic, &
+                      IREGION_INNER_CORE, &
+                      1)
+      endif ! GPU_MODE
+
+      ! adjoint / kernel runs
+      if (SIMULATION_TYPE == 3) then
+        if(.NOT. GPU_MODE) then
+          ! on CPU
+          ! sends accel values to corresponding MPI interface neighbors
+          ! crust mantle
+          call assemble_MPI_vector_s(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
+                        b_accel_crust_mantle, &
+                        b_buffer_send_vector_cm,b_buffer_recv_vector_cm, &
+                        num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
+                        nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle,&
+                        my_neighbours_crust_mantle, &
+                        b_request_send_vector_cm,b_request_recv_vector_cm)
+          ! inner core
+          call assemble_MPI_vector_s(NPROCTOT_VAL,NGLOB_INNER_CORE, &
+                        b_accel_inner_core, &
+                        b_buffer_send_vector_inner_core,b_buffer_recv_vector_inner_core, &
+                        num_interfaces_inner_core,max_nibool_interfaces_ic, &
+                        nibool_interfaces_inner_core,ibool_interfaces_inner_core,&
+                        my_neighbours_inner_core, &
+                        b_request_send_vector_ic,b_request_recv_vector_ic)
+        else
+          ! on GPU
+          ! crust mantle
+          call assemble_MPI_vector_send_cuda(NPROCTOT_VAL, &
+                      b_buffer_send_vector_cm,b_buffer_recv_vector_cm, &
+                      num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
+                      nibool_interfaces_crust_mantle,&
+                      my_neighbours_crust_mantle, &
+                      b_request_send_vector_cm,b_request_recv_vector_cm, &
+                      IREGION_CRUST_MANTLE, &
+                      3) ! <-- 3 == adjoint b_accel
+          ! inner core
+          call assemble_MPI_vector_send_cuda(NPROCTOT_VAL, &
+                      b_buffer_send_vector_inner_core,b_buffer_recv_vector_inner_core, &
+                      num_interfaces_inner_core,max_nibool_interfaces_ic, &
+                      nibool_interfaces_inner_core,&
+                      my_neighbours_inner_core, &
+                      b_request_send_vector_ic,b_request_recv_vector_ic, &
+                      IREGION_INNER_CORE, &
+                      3)
+        endif ! GPU
+      endif ! SIMULATION_TYPE == 3
+
+    else
+      ! waits for send/receive requests to be completed and assembles values
+      if(.NOT. GPU_MODE) then
+        ! on CPU
+        ! crust mantle
+        call assemble_MPI_vector_w(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
+                              accel_crust_mantle, &
+                              buffer_recv_vector_crust_mantle,num_interfaces_crust_mantle,&
+                              max_nibool_interfaces_cm, &
+                              nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
+                              request_send_vector_cm,request_recv_vector_cm)
+        ! inner core
+        call assemble_MPI_vector_w(NPROCTOT_VAL,NGLOB_INNER_CORE, &
+                              accel_inner_core, &
+                              buffer_recv_vector_inner_core,num_interfaces_inner_core,&
+                              max_nibool_interfaces_ic, &
+                              nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
+                              request_send_vector_ic,request_recv_vector_ic)
+      else
+        ! on GPU
+        ! crust mantle
+        call assemble_MPI_vector_write_cuda(NPROCTOT_VAL, &
+                            buffer_recv_vector_crust_mantle, &
+                            num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
+                            request_send_vector_cm,request_recv_vector_cm, &
+                            IREGION_CRUST_MANTLE, &
+                            1) ! <-- 1 == fwd accel
+        ! inner core
+        call assemble_MPI_vector_write_cuda(NPROCTOT_VAL, &
+                            buffer_recv_vector_inner_core, &
+                            num_interfaces_inner_core,max_nibool_interfaces_ic, &
+                            request_send_vector_ic,request_recv_vector_ic, &
+                            IREGION_INNER_CORE, &
+                            1)
+      endif
+
+
+      ! adjoint / kernel runs
+      if (SIMULATION_TYPE == 3) then
+        ! waits for send/receive requests to be completed and assembles values
+        if(.NOT. GPU_MODE) then
+          ! on CPU
+          ! crust mantle
+          call assemble_MPI_vector_w(NPROCTOT_VAL,NGLOB_CRUST_MANTLE, &
+                              b_accel_crust_mantle, &
+                              b_buffer_recv_vector_cm,num_interfaces_crust_mantle,&
+                              max_nibool_interfaces_cm, &
+                              nibool_interfaces_crust_mantle,ibool_interfaces_crust_mantle, &
+                              b_request_send_vector_cm,b_request_recv_vector_cm)
+          ! inner core
+          call assemble_MPI_vector_w(NPROCTOT_VAL,NGLOB_INNER_CORE, &
+                              b_accel_inner_core, &
+                              b_buffer_recv_vector_inner_core,num_interfaces_inner_core,&
+                              max_nibool_interfaces_ic, &
+                              nibool_interfaces_inner_core,ibool_interfaces_inner_core, &
+                              b_request_send_vector_ic,b_request_recv_vector_ic)
+
+        else
+          ! on GPU
+          ! crust mantle
+          call assemble_MPI_vector_write_cuda(NPROCTOT_VAL, &
+                            b_buffer_recv_vector_cm, &
+                            num_interfaces_crust_mantle,max_nibool_interfaces_cm, &
+                            b_request_send_vector_cm,b_request_recv_vector_cm, &
+                            IREGION_CRUST_MANTLE, &
+                            3) ! <-- 3 == adjoint b_accel
+          ! inner core
+          call assemble_MPI_vector_write_cuda(NPROCTOT_VAL,&
+                            b_buffer_recv_vector_inner_core, &
+                            num_interfaces_inner_core,max_nibool_interfaces_ic, &
+                            b_request_send_vector_ic,b_request_recv_vector_ic, &
+                            IREGION_INNER_CORE, &
+                            3)
+        endif
+      endif ! SIMULATION_TYPE == 3
+    endif ! iphase == 1
+
+  enddo ! iphase
+
+  ! updates (only) acceleration w/ rotation in the crust/mantle region (touches oceans)
+  if(.NOT. GPU_MODE) then
+     ! on CPU
+     call compute_forces_el_update_accel(NGLOB_CRUST_MANTLE,NGLOB_XY_CM,veloc_crust_mantle,accel_crust_mantle, &
+                                        two_omega_earth, &
+                                        rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle, &
+                                        NCHUNKS_VAL,ABSORBING_CONDITIONS)
+     ! adjoint / kernel runs
+     if (SIMULATION_TYPE == 3) &
+          call compute_forces_el_update_accel(NGLOB_CRUST_MANTLE_ADJOINT,NGLOB_XY_CM,b_veloc_crust_mantle,b_accel_crust_mantle, &
+                                             b_two_omega_earth, &
+                                             rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle, &
+                                             NCHUNKS_VAL,ABSORBING_CONDITIONS)
+  else
+     ! on GPU
+     call kernel_3_a_cuda(Mesh_pointer, &
+                         deltatover2,SIMULATION_TYPE,b_deltatover2,NCHUNKS_VAL)
+  endif
+
+  ! couples ocean with crust mantle
+  ! (updates acceleration with ocean load approximation)
+  if( OCEANS_VAL ) then
+    if(.NOT. GPU_MODE) then
+      ! on CPU
+      call compute_coupling_ocean(accel_crust_mantle,b_accel_crust_mantle, &
+             rmassx_crust_mantle, rmassy_crust_mantle, rmassz_crust_mantle, &
+             rmass_ocean_load,normal_top_crust_mantle, &
+             ibool_crust_mantle,ibelm_top_crust_mantle, &
+             updated_dof_ocean_load,NGLOB_XY_CM, &
+             SIMULATION_TYPE,NSPEC2D_TOP(IREGION_CRUST_MANTLE), &
+             ABSORBING_CONDITIONS)
+
+    else
+      ! on GPU
+      call compute_coupling_ocean_cuda(Mesh_pointer,NCHUNKS_VAL)
+    endif
+  endif
+
+  ! Newmark time scheme:
+  ! corrector terms for elastic parts
+  ! (updates velocity)
+  if(.NOT. GPU_MODE ) then
+    ! on CPU
+    call compute_forces_el_update_veloc(NGLOB_CRUST_MANTLE,veloc_crust_mantle,accel_crust_mantle, &
+                                       NGLOB_INNER_CORE,veloc_inner_core,accel_inner_core, &
+                                       deltatover2,two_omega_earth,rmass_inner_core)
+    ! adjoint / kernel runs
+    if (SIMULATION_TYPE == 3) &
+      call compute_forces_el_update_veloc(NGLOB_CRUST_MANTLE_ADJOINT,b_veloc_crust_mantle,b_accel_crust_mantle, &
+                                         NGLOB_INNER_CORE_ADJOINT,b_veloc_inner_core,b_accel_inner_core, &
+                                         b_deltatover2,b_two_omega_earth,rmass_inner_core)
+  else
+    ! on GPU
+    call kernel_3_b_cuda(Mesh_pointer, &
+                        deltatover2,SIMULATION_TYPE,b_deltatover2)
+  endif
+
+
+!daniel: att - debug
+!  if( DEBUG ) then
+!    if( SIMULATION_TYPE == 1) then
+!      if( it > NSTEP - 1000 .and. myrank == 0 ) then
+!        print*,'it',it,'Rxx:',R_xx_crust_mantle(1,1,1,1,100),epsilondev_xx_crust_mantle(1,1,1,100)
+!      endif
+!    elseif( SIMULATION_TYPE == 3 ) then
+!      if( it <= 1000 .and. myrank == 0 ) then
+!        print*,'it',it,'Rxx:',b_R_xx_crust_mantle(1,1,1,1,100),b_epsilondev_xx_crust_mantle(1,1,1,100)
+!      endif
+!    endif
+!  endif
+
+  end subroutine compute_forces_elastic
+
+
+!=====================================================================
+
+  subroutine compute_forces_el_update_accel(NGLOB,NGLOB_XY,veloc_crust_mantle,accel_crust_mantle, &
+                                           two_omega_earth, &
+                                           rmassx_crust_mantle,rmassy_crust_mantle,rmassz_crust_mantle, &
+                                           NCHUNKS_VAL,ABSORBING_CONDITIONS)
+
+  use constants_solver,only: CUSTOM_REAL,NDIM
+
+#ifdef _HANDOPT
+  use specfem_par,only: imodulo_NGLOB_CRUST_MANTLE4
+#endif
+
+  implicit none
+
+  integer :: NGLOB,NGLOB_XY,NCHUNKS_VAL
+
+  ! velocity & acceleration
+  ! crust/mantle region
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: veloc_crust_mantle,accel_crust_mantle
+
+  ! mass matrices
+  !
+  ! in the case of stacey boundary conditions, add C*deltat/2 contribution to the mass matrix
+  ! on Stacey edges for the crust_mantle and outer_core regions but not for the inner_core region
+  ! thus the mass matrix must be replaced by three mass matrices including the "C" damping matrix
+  !
+  ! if absorbing_conditions are not set or if NCHUNKS=6, only one mass matrix is needed
+  ! for the sake of performance, only "rmassz" array will be filled and "rmassx" & "rmassy" will be obsolete
+  real(kind=CUSTOM_REAL), dimension(NGLOB_XY) :: rmassx_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NGLOB_XY) :: rmassy_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NGLOB)    :: rmassz_crust_mantle
+
+  real(kind=CUSTOM_REAL) :: two_omega_earth
+
+  logical :: ABSORBING_CONDITIONS
+
+  ! local parameters
+  integer :: i
+
+  ! updates acceleration w/ rotation in crust/mantle region only
+
+  if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS) then
+
+#ifdef _HANDOPT_NEWMARK
+     ! way 2:
+     if(imodulo_NGLOB_CRUST_MANTLE4 >= 1) then
+        do i=1,imodulo_NGLOB_CRUST_MANTLE4
+           accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmassx_crust_mantle(i) &
+                + two_omega_earth*veloc_crust_mantle(2,i)
+           accel_crust_mantle(2,i) = accel_crust_mantle(2,i)*rmassy_crust_mantle(i) &
+                - two_omega_earth*veloc_crust_mantle(1,i)
+           accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmassz_crust_mantle(i)
+        enddo
+     endif
+     do i=imodulo_NGLOB_CRUST_MANTLE4+1,NGLOB,4
+        accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmassx_crust_mantle(i) &
+             + two_omega_earth*veloc_crust_mantle(2,i)
+        accel_crust_mantle(2,i) = accel_crust_mantle(2,i)*rmassy_crust_mantle(i) &
+             - two_omega_earth*veloc_crust_mantle(1,i)
+        accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmassz_crust_mantle(i)
+
+        accel_crust_mantle(1,i+1) = accel_crust_mantle(1,i+1)*rmassx_crust_mantle(i+1) &
+             + two_omega_earth*veloc_crust_mantle(2,i+1)
+        accel_crust_mantle(2,i+1) = accel_crust_mantle(2,i+1)*rmassy_crust_mantle(i+1) &
+             - two_omega_earth*veloc_crust_mantle(1,i+1)
+        accel_crust_mantle(3,i+1) = accel_crust_mantle(3,i+1)*rmassz_crust_mantle(i+1)
+
+        accel_crust_mantle(1,i+2) = accel_crust_mantle(1,i+2)*rmassx_crust_mantle(i+2) &
+             + two_omega_earth*veloc_crust_mantle(2,i+2)
+        accel_crust_mantle(2,i+2) = accel_crust_mantle(2,i+2)*rmassy_crust_mantle(i+2) &
+             - two_omega_earth*veloc_crust_mantle(1,i+2)
+        accel_crust_mantle(3,i+2) = accel_crust_mantle(3,i+2)*rmassz_crust_mantle(i+2)
+
+        accel_crust_mantle(1,i+3) = accel_crust_mantle(1,i+3)*rmassx_crust_mantle(i+3) &
+             + two_omega_earth*veloc_crust_mantle(2,i+3)
+        accel_crust_mantle(2,i+3) = accel_crust_mantle(2,i+3)*rmassy_crust_mantle(i+3) &
+             - two_omega_earth*veloc_crust_mantle(1,i+3)
+        accel_crust_mantle(3,i+3) = accel_crust_mantle(3,i+3)*rmassz_crust_mantle(i+3)
+     enddo
+#else
+     ! way 1:
+     do i=1,NGLOB
+        accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmassx_crust_mantle(i) &
+             + two_omega_earth*veloc_crust_mantle(2,i)
+        accel_crust_mantle(2,i) = accel_crust_mantle(2,i)*rmassy_crust_mantle(i) &
+             - two_omega_earth*veloc_crust_mantle(1,i)
+        accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmassz_crust_mantle(i)
+     enddo
+#endif
+
+  else
+
+#ifdef _HANDOPT_NEWMARK
+     ! way 2:
+     if(imodulo_NGLOB_CRUST_MANTLE4 >= 1) then
+        do i=1,imodulo_NGLOB_CRUST_MANTLE4
+           accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmassz_crust_mantle(i) &
+                + two_omega_earth*veloc_crust_mantle(2,i)
+           accel_crust_mantle(2,i) = accel_crust_mantle(2,i)*rmassz_crust_mantle(i) &
+                - two_omega_earth*veloc_crust_mantle(1,i)
+           accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmassz_crust_mantle(i)
+        enddo
+     endif
+     do i=imodulo_NGLOB_CRUST_MANTLE4+1,NGLOB,4
+        accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmassz_crust_mantle(i) &
+             + two_omega_earth*veloc_crust_mantle(2,i)
+        accel_crust_mantle(2,i) = accel_crust_mantle(2,i)*rmassz_crust_mantle(i) &
+             - two_omega_earth*veloc_crust_mantle(1,i)
+        accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmassz_crust_mantle(i)
+
+        accel_crust_mantle(1,i+1) = accel_crust_mantle(1,i+1)*rmassz_crust_mantle(i+1) &
+             + two_omega_earth*veloc_crust_mantle(2,i+1)
+        accel_crust_mantle(2,i+1) = accel_crust_mantle(2,i+1)*rmassz_crust_mantle(i+1) &
+             - two_omega_earth*veloc_crust_mantle(1,i+1)
+        accel_crust_mantle(3,i+1) = accel_crust_mantle(3,i+1)*rmassz_crust_mantle(i+1)
+
+        accel_crust_mantle(1,i+2) = accel_crust_mantle(1,i+2)*rmassz_crust_mantle(i+2) &
+             + two_omega_earth*veloc_crust_mantle(2,i+2)
+        accel_crust_mantle(2,i+2) = accel_crust_mantle(2,i+2)*rmassz_crust_mantle(i+2) &
+             - two_omega_earth*veloc_crust_mantle(1,i+2)
+        accel_crust_mantle(3,i+2) = accel_crust_mantle(3,i+2)*rmassz_crust_mantle(i+2)
+
+        accel_crust_mantle(1,i+3) = accel_crust_mantle(1,i+3)*rmassz_crust_mantle(i+3) &
+             + two_omega_earth*veloc_crust_mantle(2,i+3)
+        accel_crust_mantle(2,i+3) = accel_crust_mantle(2,i+3)*rmassz_crust_mantle(i+3) &
+             - two_omega_earth*veloc_crust_mantle(1,i+3)
+        accel_crust_mantle(3,i+3) = accel_crust_mantle(3,i+3)*rmassz_crust_mantle(i+3)
+     enddo
+#else
+     ! way 1:
+     do i=1,NGLOB
+        accel_crust_mantle(1,i) = accel_crust_mantle(1,i)*rmassz_crust_mantle(i) &
+             + two_omega_earth*veloc_crust_mantle(2,i)
+        accel_crust_mantle(2,i) = accel_crust_mantle(2,i)*rmassz_crust_mantle(i) &
+             - two_omega_earth*veloc_crust_mantle(1,i)
+        accel_crust_mantle(3,i) = accel_crust_mantle(3,i)*rmassz_crust_mantle(i)
+     enddo
+#endif
+
+  endif
+
+  end subroutine compute_forces_el_update_accel
+
+
+!=====================================================================
+
+  subroutine compute_forces_el_update_veloc(NGLOB_CM,veloc_crust_mantle,accel_crust_mantle, &
+                                            NGLOB_IC,veloc_inner_core,accel_inner_core, &
+                                            deltatover2,two_omega_earth,rmass_inner_core)
+
+  use constants_solver,only: CUSTOM_REAL,NDIM
+
+#ifdef _HANDOPT
+  use specfem_par,only: imodulo_NGLOB_CRUST_MANTLE4,imodulo_NGLOB_INNER_CORE
+#endif
+
+  implicit none
+
+  integer :: NGLOB_CM,NGLOB_IC
+
+  ! acceleration & velocity
+  ! crust/mantle region
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CM) :: veloc_crust_mantle,accel_crust_mantle
+  ! inner core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_IC) :: veloc_inner_core,accel_inner_core
+
+  ! mass matrix
+  real(kind=CUSTOM_REAL), dimension(NGLOB_IC) :: rmass_inner_core
+
+  real(kind=CUSTOM_REAL) :: deltatover2,two_omega_earth
+
+  ! local parameters
+  integer :: i
+
+  ! Newmark time scheme:
+  !
+  ! note:
+  !   - crust/mantle region
+  !         needs only velocity corrector terms
+  !         (acceleration already updated before)
+  !   - inner core region
+  !         needs both, acceleration update & velocity corrector terms
+
+#ifdef _HANDOPT_NEWMARK
+! way 2:
+  ! crust/mantle region
+  if( imodulo_NGLOB_CRUST_MANTLE4 >= 1 ) then
+    do i=1,imodulo_NGLOB_CRUST_MANTLE4
+      veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
+    enddo
+  endif
+  do i=imodulo_NGLOB_CRUST_MANTLE4+1,NGLOB_CM,4
+    veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
+    veloc_crust_mantle(:,i+1) = veloc_crust_mantle(:,i+1) + deltatover2*accel_crust_mantle(:,i+1)
+    veloc_crust_mantle(:,i+2) = veloc_crust_mantle(:,i+2) + deltatover2*accel_crust_mantle(:,i+2)
+    veloc_crust_mantle(:,i+3) = veloc_crust_mantle(:,i+3) + deltatover2*accel_crust_mantle(:,i+3)
+  enddo
+
+  ! inner core region
+  if(imodulo_NGLOB_INNER_CORE >= 1) then
+    do i=1,imodulo_NGLOB_INNER_CORE
+      accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
+             + two_omega_earth*veloc_inner_core(2,i)
+      accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
+             - two_omega_earth*veloc_inner_core(1,i)
+      accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
+
+      veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
+    enddo
+  endif
+  do i=imodulo_NGLOB_INNER_CORE+1,NGLOB_IC,3
+    accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
+           + two_omega_earth*veloc_inner_core(2,i)
+    accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
+           - two_omega_earth*veloc_inner_core(1,i)
+    accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
+
+    veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
+
+    accel_inner_core(1,i+1) = accel_inner_core(1,i+1)*rmass_inner_core(i+1) &
+           + two_omega_earth*veloc_inner_core(2,i+1)
+    accel_inner_core(2,i+1) = accel_inner_core(2,i+1)*rmass_inner_core(i+1) &
+           - two_omega_earth*veloc_inner_core(1,i+1)
+    accel_inner_core(3,i+1) = accel_inner_core(3,i+1)*rmass_inner_core(i+1)
+
+    veloc_inner_core(:,i+1) = veloc_inner_core(:,i+1) + deltatover2*accel_inner_core(:,i+1)
+
+    accel_inner_core(1,i+2) = accel_inner_core(1,i+2)*rmass_inner_core(i+2) &
+           + two_omega_earth*veloc_inner_core(2,i+2)
+    accel_inner_core(2,i+2) = accel_inner_core(2,i+2)*rmass_inner_core(i+2) &
+           - two_omega_earth*veloc_inner_core(1,i+2)
+    accel_inner_core(3,i+2) = accel_inner_core(3,i+2)*rmass_inner_core(i+2)
+
+    veloc_inner_core(:,i+2) = veloc_inner_core(:,i+2) + deltatover2*accel_inner_core(:,i+2)
+  enddo
+#else
+! way 1:
+  ! mantle
+  do i=1,NGLOB_CM
+    veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) + deltatover2*accel_crust_mantle(:,i)
+  enddo
+  ! inner core
+  do i=1,NGLOB_IC
+    accel_inner_core(1,i) = accel_inner_core(1,i)*rmass_inner_core(i) &
+           + two_omega_earth*veloc_inner_core(2,i)
+    accel_inner_core(2,i) = accel_inner_core(2,i)*rmass_inner_core(i) &
+           - two_omega_earth*veloc_inner_core(1,i)
+    accel_inner_core(3,i) = accel_inner_core(3,i)*rmass_inner_core(i)
+
+    veloc_inner_core(:,i) = veloc_inner_core(:,i) + deltatover2*accel_inner_core(:,i)
+  enddo
+#endif
+
+  end subroutine compute_forces_el_update_veloc

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_inner_core.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,713 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine compute_forces_inner_core( NSPEC,NGLOB,NSPEC_ATT, &
+                                        deltat, &
+                                        displ_inner_core, &
+                                        veloc_inner_core, &
+                                        accel_inner_core, &
+                                        phase_is_inner, &
+                                        R_xx,R_yy,R_xy,R_xz,R_yz, &
+                                        epsilondev_xx,epsilondev_yy,epsilondev_xy, &
+                                        epsilondev_xz,epsilondev_yz, &
+                                        epsilon_trace_over_3,&
+                                        alphaval,betaval,gammaval,factor_common, &
+                                        vx,vy,vz,vnspec)
+
+  use constants_solver
+
+  use specfem_par,only: &
+    hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+    minus_gravity_table,density_table,minus_deriv_gravity_table
+
+  use specfem_par_innercore,only: &
+    xstore => xstore_inner_core,ystore => ystore_inner_core,zstore => zstore_inner_core, &
+    xix => xix_inner_core,xiy => xiy_inner_core,xiz => xiz_inner_core, &
+    etax => etax_inner_core,etay => etay_inner_core,etaz => etaz_inner_core, &
+    gammax => gammax_inner_core,gammay => gammay_inner_core,gammaz => gammaz_inner_core, &
+    kappavstore => kappavstore_inner_core, &
+    muvstore => muvstore_inner_core, &
+    c11store => c11store_inner_core,c12store => c12store_inner_core,c13store => c13store_inner_core, &
+    c33store => c33store_inner_core,c44store => c44store_inner_core, &
+    ibool => ibool_inner_core,idoubling => idoubling_inner_core, &
+    one_minus_sum_beta => one_minus_sum_beta_inner_core, &
+    phase_ispec_inner => phase_ispec_inner_inner_core, &
+    nspec_outer => nspec_outer_inner_core, &
+    nspec_inner => nspec_inner_inner_core
+
+  implicit none
+
+  integer :: NSPEC,NGLOB,NSPEC_ATT
+
+  ! time step
+  real(kind=CUSTOM_REAL) deltat
+
+  ! displacement, velocity and acceleration
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: displ_inner_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: veloc_inner_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB) :: accel_inner_core
+
+  ! for attenuation
+  ! memory variables R_ij are stored at the local rather than global level
+  ! to allow for optimization of cache access by compiler
+  ! variable lengths for factor_common and one_minus_sum_beta
+
+  ! variable sized array variables
+  integer vx, vy, vz, vnspec
+
+  real(kind=CUSTOM_REAL), dimension(N_SLS,vx,vy,vz,vnspec) :: factor_common
+  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval,betaval,gammaval
+
+  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_ATT) :: R_xx,R_yy,R_xy,R_xz,R_yz
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: &
+    epsilondev_xx,epsilondev_yy,epsilondev_xy,epsilondev_xz,epsilondev_yz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: epsilon_trace_over_3
+
+  ! inner/outer element run flag
+  logical :: phase_is_inner
+
+  ! local parameters
+
+  real(kind=CUSTOM_REAL), dimension(5,NGLLX,NGLLY,NGLLZ) :: epsilondev_loc
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: factor_common_use
+  real(kind=CUSTOM_REAL) R_xx_val,R_yy_val
+  integer i_SLS
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: &
+    tempx1,tempx2,tempx3,tempy1,tempy2,tempy3,tempz1,tempz2,tempz3
+
+  integer ispec,iglob,ispec_strain
+  integer i,j,k,l
+
+  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+  real(kind=CUSTOM_REAL) duxdxl,duxdyl,duxdzl,duydxl,duydyl,duydzl,duzdxl,duzdyl,duzdzl
+
+  real(kind=CUSTOM_REAL) duxdxl_plus_duydyl,duxdxl_plus_duzdzl,duydyl_plus_duzdzl
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl,duzdxl_plus_duxdzl,duzdyl_plus_duydzl
+
+  real(kind=CUSTOM_REAL) sigma_xx,sigma_yy,sigma_zz,sigma_xy,sigma_xz,sigma_yz
+
+  real(kind=CUSTOM_REAL) hp1,hp2,hp3
+  real(kind=CUSTOM_REAL) fac1,fac2,fac3
+  real(kind=CUSTOM_REAL) lambdal,mul,lambdalplus2mul
+  real(kind=CUSTOM_REAL) kappal
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: sum_terms
+
+  real(kind=CUSTOM_REAL) minus_sum_beta
+  real(kind=CUSTOM_REAL) c11l,c33l,c12l,c13l,c44l
+
+  real(kind=CUSTOM_REAL) tempx1l,tempx2l,tempx3l
+  real(kind=CUSTOM_REAL) tempy1l,tempy2l,tempy3l
+  real(kind=CUSTOM_REAL) tempz1l,tempz2l,tempz3l
+
+  real(kind=CUSTOM_REAL) tempx1l_att,tempx2l_att,tempx3l_att
+  real(kind=CUSTOM_REAL) tempy1l_att,tempy2l_att,tempy3l_att
+  real(kind=CUSTOM_REAL) tempz1l_att,tempz2l_att,tempz3l_att
+
+  real(kind=CUSTOM_REAL) duxdxl_att,duxdyl_att,duxdzl_att,duydxl_att
+  real(kind=CUSTOM_REAL) duydyl_att,duydzl_att,duzdxl_att,duzdyl_att,duzdzl_att;
+  real(kind=CUSTOM_REAL) duxdyl_plus_duydxl_att,duzdxl_plus_duxdzl_att,duzdyl_plus_duydzl_att;
+
+  ! for gravity
+  double precision radius,rho,minus_g,minus_dg
+  double precision minus_g_over_radius,minus_dg_plus_g_over_radius
+  double precision cos_theta,sin_theta,cos_phi,sin_phi
+  double precision cos_theta_sq,sin_theta_sq,cos_phi_sq,sin_phi_sq
+  double precision theta,phi,factor,gxl,gyl,gzl,sx_l,sy_l,sz_l
+  double precision Hxxl,Hyyl,Hzzl,Hxyl,Hxzl,Hyzl
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NGLLZ) :: rho_s_H
+  real(kind=CUSTOM_REAL) sigma_yx,sigma_zx,sigma_zy
+  integer :: int_radius
+
+!  integer :: computed_elements
+  integer :: num_elements,ispec_p
+  integer :: iphase
+
+! ****************************************************
+!   big loop over all spectral elements in the solid
+! ****************************************************
+
+!  computed_elements = 0
+  if( .not. phase_is_inner ) then
+    iphase = 1
+    num_elements = nspec_outer
+  else
+    iphase = 2
+    num_elements = nspec_inner
+  endif
+
+  do ispec_p = 1,num_elements
+
+    ispec = phase_ispec_inner(ispec_p,iphase)
+
+    ! only compute element which belong to current phase (inner or outer elements)
+
+    ! exclude fictitious elements in central cube
+    if(idoubling(ispec) /= IFLAG_IN_FICTITIOUS_CUBE) then
+
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempx2l = 0._CUSTOM_REAL
+          tempx3l = 0._CUSTOM_REAL
+
+          tempy1l = 0._CUSTOM_REAL
+          tempy2l = 0._CUSTOM_REAL
+          tempy3l = 0._CUSTOM_REAL
+
+          tempz1l = 0._CUSTOM_REAL
+          tempz2l = 0._CUSTOM_REAL
+          tempz3l = 0._CUSTOM_REAL
+
+          do l=1,NGLLX
+            hp1 = hprime_xx(i,l)
+            iglob = ibool(l,j,k,ispec)
+            tempx1l = tempx1l + displ_inner_core(1,iglob)*hp1
+            tempy1l = tempy1l + displ_inner_core(2,iglob)*hp1
+            tempz1l = tempz1l + displ_inner_core(3,iglob)*hp1
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+            hp2 = hprime_yy(j,l)
+            iglob = ibool(i,l,k,ispec)
+            tempx2l = tempx2l + displ_inner_core(1,iglob)*hp2
+            tempy2l = tempy2l + displ_inner_core(2,iglob)*hp2
+            tempz2l = tempz2l + displ_inner_core(3,iglob)*hp2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+            hp3 = hprime_zz(k,l)
+            iglob = ibool(i,j,l,ispec)
+            tempx3l = tempx3l + displ_inner_core(1,iglob)*hp3
+            tempy3l = tempy3l + displ_inner_core(2,iglob)*hp3
+            tempz3l = tempz3l + displ_inner_core(3,iglob)*hp3
+          enddo
+
+          if( ATTENUATION_VAL .and. COMPUTE_AND_STORE_STRAIN ) then
+             ! temporary variables used for fixing attenuation in a consistent way
+
+             tempx1l_att = tempx1l
+             tempx2l_att = tempx2l
+             tempx3l_att = tempx3l
+
+             tempy1l_att = tempy1l
+             tempy2l_att = tempy2l
+             tempy3l_att = tempy3l
+
+             tempz1l_att = tempz1l
+             tempz2l_att = tempz2l
+             tempz3l_att = tempz3l
+
+             if(ATTENUATION_NEW_VAL) then
+                ! takes new routines
+                ! use first order Taylor expansion of displacement for local storage of stresses
+                ! at this current time step, to fix attenuation in a consistent way
+                do l=1,NGLLX
+                   hp1 = hprime_xx(i,l)
+                   iglob = ibool(l,j,k,ispec)
+                   tempx1l_att = tempx1l_att + deltat*veloc_inner_core(1,iglob)*hp1
+                   tempy1l_att = tempy1l_att + deltat*veloc_inner_core(2,iglob)*hp1
+                   tempz1l_att = tempz1l_att + deltat*veloc_inner_core(3,iglob)*hp1
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+                   hp2 = hprime_yy(j,l)
+                   iglob = ibool(i,l,k,ispec)
+                   tempx2l_att = tempx2l_att + deltat*veloc_inner_core(1,iglob)*hp2
+                   tempy2l_att = tempy2l_att + deltat*veloc_inner_core(2,iglob)*hp2
+                   tempz2l_att = tempz2l_att + deltat*veloc_inner_core(3,iglob)*hp2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+                   hp3 = hprime_zz(k,l)
+                   iglob = ibool(i,j,l,ispec)
+                   tempx3l_att = tempx3l_att + deltat*veloc_inner_core(1,iglob)*hp3
+                   tempy3l_att = tempy3l_att + deltat*veloc_inner_core(2,iglob)*hp3
+                   tempz3l_att = tempz3l_att + deltat*veloc_inner_core(3,iglob)*hp3
+                enddo
+             endif
+          endif
+
+!         get derivatives of ux, uy and uz with respect to x, y and z
+
+          xixl = xix(i,j,k,ispec)
+          xiyl = xiy(i,j,k,ispec)
+          xizl = xiz(i,j,k,ispec)
+          etaxl = etax(i,j,k,ispec)
+          etayl = etay(i,j,k,ispec)
+          etazl = etaz(i,j,k,ispec)
+          gammaxl = gammax(i,j,k,ispec)
+          gammayl = gammay(i,j,k,ispec)
+          gammazl = gammaz(i,j,k,ispec)
+
+! compute the jacobian
+          jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
+                        - xiyl*(etaxl*gammazl-etazl*gammaxl) &
+                        + xizl*(etaxl*gammayl-etayl*gammaxl))
+
+          duxdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+          duxdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+          duxdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+          duydxl = xixl*tempy1l + etaxl*tempy2l + gammaxl*tempy3l
+          duydyl = xiyl*tempy1l + etayl*tempy2l + gammayl*tempy3l
+          duydzl = xizl*tempy1l + etazl*tempy2l + gammazl*tempy3l
+
+          duzdxl = xixl*tempz1l + etaxl*tempz2l + gammaxl*tempz3l
+          duzdyl = xiyl*tempz1l + etayl*tempz2l + gammayl*tempz3l
+          duzdzl = xizl*tempz1l + etazl*tempz2l + gammazl*tempz3l
+
+! precompute some sums to save CPU time
+          duxdxl_plus_duydyl = duxdxl + duydyl
+          duxdxl_plus_duzdzl = duxdxl + duzdzl
+          duydyl_plus_duzdzl = duydyl + duzdzl
+          duxdyl_plus_duydxl = duxdyl + duydxl
+          duzdxl_plus_duxdzl = duzdxl + duxdzl
+          duzdyl_plus_duydzl = duzdyl + duydzl
+
+          if( ATTENUATION_VAL .and. COMPUTE_AND_STORE_STRAIN ) then
+             ! temporary variables used for fixing attenuation in a consistent way
+             duxdxl_att = xixl*tempx1l_att + etaxl*tempx2l_att + gammaxl*tempx3l_att
+             duxdyl_att = xiyl*tempx1l_att + etayl*tempx2l_att + gammayl*tempx3l_att
+             duxdzl_att = xizl*tempx1l_att + etazl*tempx2l_att + gammazl*tempx3l_att
+
+             duydxl_att = xixl*tempy1l_att + etaxl*tempy2l_att + gammaxl*tempy3l_att
+             duydyl_att = xiyl*tempy1l_att + etayl*tempy2l_att + gammayl*tempy3l_att
+             duydzl_att = xizl*tempy1l_att + etazl*tempy2l_att + gammazl*tempy3l_att
+
+             duzdxl_att = xixl*tempz1l_att + etaxl*tempz2l_att + gammaxl*tempz3l_att
+             duzdyl_att = xiyl*tempz1l_att + etayl*tempz2l_att + gammayl*tempz3l_att
+             duzdzl_att = xizl*tempz1l_att + etazl*tempz2l_att + gammazl*tempz3l_att
+
+             ! precompute some sums to save CPU time
+             duxdyl_plus_duydxl_att = duxdyl_att + duydxl_att
+             duzdxl_plus_duxdzl_att = duzdxl_att + duxdzl_att
+             duzdyl_plus_duydzl_att = duzdyl_att + duydzl_att
+
+             ! compute deviatoric strain
+             if(NSPEC_INNER_CORE_STRAIN_ONLY == 1) then
+                ispec_strain = 1
+             else
+                ispec_strain = ispec
+             endif
+             epsilon_trace_over_3(i,j,k,ispec_strain) = ONE_THIRD * (duxdxl_att + duydyl_att + duzdzl_att)
+             epsilondev_loc(1,i,j,k) = duxdxl_att - epsilon_trace_over_3(i,j,k,ispec_strain)
+             epsilondev_loc(2,i,j,k) = duydyl_att - epsilon_trace_over_3(i,j,k,ispec_strain)
+             epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl_att
+             epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl_att
+             epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl_att
+          else
+             ! compute deviatoric strain
+             if (COMPUTE_AND_STORE_STRAIN) then
+                if(NSPEC_INNER_CORE_STRAIN_ONLY == 1) then
+                   ispec_strain = 1
+                else
+                   ispec_strain = ispec
+                endif
+                epsilon_trace_over_3(i,j,k,ispec_strain) = ONE_THIRD * (duxdxl + duydyl + duzdzl)
+                epsilondev_loc(1,i,j,k) = duxdxl - epsilon_trace_over_3(i,j,k,ispec_strain)
+                epsilondev_loc(2,i,j,k) = duydyl - epsilon_trace_over_3(i,j,k,ispec_strain)
+                epsilondev_loc(3,i,j,k) = 0.5 * duxdyl_plus_duydxl
+                epsilondev_loc(4,i,j,k) = 0.5 * duzdxl_plus_duxdzl
+                epsilondev_loc(5,i,j,k) = 0.5 * duzdyl_plus_duydzl
+             endif
+          endif
+
+          ! precompute terms for attenuation if needed
+          if( ATTENUATION_VAL ) then
+            if( USE_3D_ATTENUATION_ARRAYS ) then
+              minus_sum_beta =  one_minus_sum_beta(i,j,k,ispec) - 1.0_CUSTOM_REAL
+            else
+              minus_sum_beta =  one_minus_sum_beta(1,1,1,ispec) - 1.0_CUSTOM_REAL
+            endif
+          endif
+
+          if(ANISOTROPIC_INNER_CORE_VAL) then
+
+! elastic tensor for hexagonal symmetry in reduced notation:
+!
+!      c11 c12 c13  0   0        0
+!      c12 c11 c13  0   0        0
+!      c13 c13 c33  0   0        0
+!       0   0   0  c44  0        0
+!       0   0   0   0  c44       0
+!       0   0   0   0   0  (c11-c12)/2
+!
+!       in terms of the A, C, L, N and F of Love (1927):
+!
+!       c11 = A
+!       c12 = A-2N
+!       c13 = F
+!       c33 = C
+!       c44 = L
+
+            c11l = c11store(i,j,k,ispec)
+            c12l = c12store(i,j,k,ispec)
+            c13l = c13store(i,j,k,ispec)
+            c33l = c33store(i,j,k,ispec)
+            c44l = c44store(i,j,k,ispec)
+
+! use unrelaxed parameters if attenuation
+            if(ATTENUATION_VAL) then
+              mul = muvstore(i,j,k,ispec)
+              c11l = c11l + FOUR_THIRDS * minus_sum_beta * mul
+              c12l = c12l - TWO_THIRDS * minus_sum_beta * mul
+              c13l = c13l - TWO_THIRDS * minus_sum_beta * mul
+              c33l = c33l + FOUR_THIRDS * minus_sum_beta * mul
+              c44l = c44l + minus_sum_beta * mul
+            endif
+
+            sigma_xx = c11l*duxdxl + c12l*duydyl + c13l*duzdzl
+            sigma_yy = c12l*duxdxl + c11l*duydyl + c13l*duzdzl
+            sigma_zz = c13l*duxdxl + c13l*duydyl + c33l*duzdzl
+            sigma_xy = 0.5*(c11l-c12l)*duxdyl_plus_duydxl
+            sigma_xz = c44l*duzdxl_plus_duxdzl
+            sigma_yz = c44l*duzdyl_plus_duydzl
+          else
+
+! inner core with no anisotropy, use kappav and muv for instance
+! layer with no anisotropy, use kappav and muv for instance
+            kappal = kappavstore(i,j,k,ispec)
+            mul = muvstore(i,j,k,ispec)
+
+            ! use unrelaxed parameters if attenuation
+            if( ATTENUATION_VAL ) then
+              if( USE_3D_ATTENUATION_ARRAYS ) then
+                mul = mul * one_minus_sum_beta(i,j,k,ispec)
+              else
+                mul = mul * one_minus_sum_beta(1,1,1,ispec)
+              endif
+            endif
+
+            lambdalplus2mul = kappal + FOUR_THIRDS * mul
+            lambdal = lambdalplus2mul - 2.*mul
+
+! compute stress sigma
+
+            sigma_xx = lambdalplus2mul*duxdxl + lambdal*duydyl_plus_duzdzl
+            sigma_yy = lambdalplus2mul*duydyl + lambdal*duxdxl_plus_duzdzl
+            sigma_zz = lambdalplus2mul*duzdzl + lambdal*duxdxl_plus_duydyl
+
+            sigma_xy = mul*duxdyl_plus_duydxl
+            sigma_xz = mul*duzdxl_plus_duxdzl
+            sigma_yz = mul*duzdyl_plus_duydzl
+
+          endif
+
+! subtract memory variables if attenuation
+          if(ATTENUATION_VAL .and. ( USE_ATTENUATION_MIMIC .eqv. .false. ) ) then
+            do i_SLS = 1,N_SLS
+              R_xx_val = R_xx(i_SLS,i,j,k,ispec)
+              R_yy_val = R_yy(i_SLS,i,j,k,ispec)
+              sigma_xx = sigma_xx - R_xx_val
+              sigma_yy = sigma_yy - R_yy_val
+              sigma_zz = sigma_zz + R_xx_val + R_yy_val
+              sigma_xy = sigma_xy - R_xy(i_SLS,i,j,k,ispec)
+              sigma_xz = sigma_xz - R_xz(i_SLS,i,j,k,ispec)
+              sigma_yz = sigma_yz - R_yz(i_SLS,i,j,k,ispec)
+            enddo
+          endif
+
+! define symmetric components of sigma for gravity
+          sigma_yx = sigma_xy
+          sigma_zx = sigma_xz
+          sigma_zy = sigma_yz
+
+! compute non-symmetric terms for gravity
+          if(GRAVITY_VAL) then
+
+! use mesh coordinates to get theta and phi
+! x y and z contain r theta and phi
+
+            iglob = ibool(i,j,k,ispec)
+            radius = dble(xstore(iglob))
+            theta = dble(ystore(iglob))
+            phi = dble(zstore(iglob))
+
+! make sure radius is never zero even for points at center of cube
+! because we later divide by radius
+            if(radius < 100.d0 / R_EARTH) radius = 100.d0 / R_EARTH
+
+            cos_theta = dcos(theta)
+            sin_theta = dsin(theta)
+            cos_phi = dcos(phi)
+            sin_phi = dsin(phi)
+
+! get g, rho and dg/dr=dg
+! spherical components of the gravitational acceleration
+! for efficiency replace with lookup table every 100 m in radial direction
+! make sure we never use zero for point exactly at the center of the Earth
+            int_radius = max(1,nint(radius * R_EARTH_KM * 10.d0))
+            minus_g = minus_gravity_table(int_radius)
+            minus_dg = minus_deriv_gravity_table(int_radius)
+            rho = density_table(int_radius)
+
+! Cartesian components of the gravitational acceleration
+            gxl = minus_g*sin_theta*cos_phi
+            gyl = minus_g*sin_theta*sin_phi
+            gzl = minus_g*cos_theta
+
+! Cartesian components of gradient of gravitational acceleration
+! obtained from spherical components
+
+            minus_g_over_radius = minus_g / radius
+            minus_dg_plus_g_over_radius = minus_dg - minus_g_over_radius
+
+            cos_theta_sq = cos_theta**2
+            sin_theta_sq = sin_theta**2
+            cos_phi_sq = cos_phi**2
+            sin_phi_sq = sin_phi**2
+
+            Hxxl = minus_g_over_radius*(cos_phi_sq*cos_theta_sq + sin_phi_sq) + cos_phi_sq*minus_dg*sin_theta_sq
+            Hyyl = minus_g_over_radius*(cos_phi_sq + cos_theta_sq*sin_phi_sq) + minus_dg*sin_phi_sq*sin_theta_sq
+            Hzzl = cos_theta_sq*minus_dg + minus_g_over_radius*sin_theta_sq
+            Hxyl = cos_phi*minus_dg_plus_g_over_radius*sin_phi*sin_theta_sq
+            Hxzl = cos_phi*cos_theta*minus_dg_plus_g_over_radius*sin_theta
+            Hyzl = cos_theta*minus_dg_plus_g_over_radius*sin_phi*sin_theta
+
+            iglob = ibool(i,j,k,ispec)
+
+! distinguish between single and double precision for reals
+            if(CUSTOM_REAL == SIZE_REAL) then
+
+! get displacement and multiply by density to compute G tensor
+              sx_l = rho * dble(displ_inner_core(1,iglob))
+              sy_l = rho * dble(displ_inner_core(2,iglob))
+              sz_l = rho * dble(displ_inner_core(3,iglob))
+
+! compute G tensor from s . g and add to sigma (not symmetric)
+              sigma_xx = sigma_xx + sngl(sy_l*gyl + sz_l*gzl)
+              sigma_yy = sigma_yy + sngl(sx_l*gxl + sz_l*gzl)
+              sigma_zz = sigma_zz + sngl(sx_l*gxl + sy_l*gyl)
+
+              sigma_xy = sigma_xy - sngl(sx_l * gyl)
+              sigma_yx = sigma_yx - sngl(sy_l * gxl)
+
+              sigma_xz = sigma_xz - sngl(sx_l * gzl)
+              sigma_zx = sigma_zx - sngl(sz_l * gxl)
+
+              sigma_yz = sigma_yz - sngl(sy_l * gzl)
+              sigma_zy = sigma_zy - sngl(sz_l * gyl)
+
+! precompute vector
+              factor = dble(jacobianl) * wgll_cube(i,j,k)
+              rho_s_H(1,i,j,k) = sngl(factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl))
+              rho_s_H(2,i,j,k) = sngl(factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl))
+              rho_s_H(3,i,j,k) = sngl(factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl))
+
+            else
+
+! get displacement and multiply by density to compute G tensor
+              sx_l = rho * displ_inner_core(1,iglob)
+              sy_l = rho * displ_inner_core(2,iglob)
+              sz_l = rho * displ_inner_core(3,iglob)
+
+! compute G tensor from s . g and add to sigma (not symmetric)
+              sigma_xx = sigma_xx + sy_l*gyl + sz_l*gzl
+              sigma_yy = sigma_yy + sx_l*gxl + sz_l*gzl
+              sigma_zz = sigma_zz + sx_l*gxl + sy_l*gyl
+
+              sigma_xy = sigma_xy - sx_l * gyl
+              sigma_yx = sigma_yx - sy_l * gxl
+
+              sigma_xz = sigma_xz - sx_l * gzl
+              sigma_zx = sigma_zx - sz_l * gxl
+
+              sigma_yz = sigma_yz - sy_l * gzl
+              sigma_zy = sigma_zy - sz_l * gyl
+
+! precompute vector
+              factor = jacobianl * wgll_cube(i,j,k)
+              rho_s_H(1,i,j,k) = factor * (sx_l * Hxxl + sy_l * Hxyl + sz_l * Hxzl)
+              rho_s_H(2,i,j,k) = factor * (sx_l * Hxyl + sy_l * Hyyl + sz_l * Hyzl)
+              rho_s_H(3,i,j,k) = factor * (sx_l * Hxzl + sy_l * Hyzl + sz_l * Hzzl)
+
+            endif
+
+          endif  ! end of section with gravity terms
+
+! form dot product with test vector, non-symmetric form
+
+          tempx1(i,j,k) = jacobianl * (sigma_xx*xixl + sigma_yx*xiyl + sigma_zx*xizl)
+          tempy1(i,j,k) = jacobianl * (sigma_xy*xixl + sigma_yy*xiyl + sigma_zy*xizl)
+          tempz1(i,j,k) = jacobianl * (sigma_xz*xixl + sigma_yz*xiyl + sigma_zz*xizl)
+
+          tempx2(i,j,k) = jacobianl * (sigma_xx*etaxl + sigma_yx*etayl + sigma_zx*etazl)
+          tempy2(i,j,k) = jacobianl * (sigma_xy*etaxl + sigma_yy*etayl + sigma_zy*etazl)
+          tempz2(i,j,k) = jacobianl * (sigma_xz*etaxl + sigma_yz*etayl + sigma_zz*etazl)
+
+          tempx3(i,j,k) = jacobianl * (sigma_xx*gammaxl + sigma_yx*gammayl + sigma_zx*gammazl)
+          tempy3(i,j,k) = jacobianl * (sigma_xy*gammaxl + sigma_yy*gammayl + sigma_zy*gammazl)
+          tempz3(i,j,k) = jacobianl * (sigma_xz*gammaxl + sigma_yz*gammayl + sigma_zz*gammazl)
+
+        enddo
+      enddo
+    enddo
+
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempy1l = 0._CUSTOM_REAL
+          tempz1l = 0._CUSTOM_REAL
+
+          tempx2l = 0._CUSTOM_REAL
+          tempy2l = 0._CUSTOM_REAL
+          tempz2l = 0._CUSTOM_REAL
+
+          tempx3l = 0._CUSTOM_REAL
+          tempy3l = 0._CUSTOM_REAL
+          tempz3l = 0._CUSTOM_REAL
+
+          do l=1,NGLLX
+            fac1 = hprimewgll_xx(l,i)
+            tempx1l = tempx1l + tempx1(l,j,k)*fac1
+            tempy1l = tempy1l + tempy1(l,j,k)*fac1
+            tempz1l = tempz1l + tempz1(l,j,k)*fac1
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLY
+            fac2 = hprimewgll_yy(l,j)
+            tempx2l = tempx2l + tempx2(i,l,k)*fac2
+            tempy2l = tempy2l + tempy2(i,l,k)*fac2
+            tempz2l = tempz2l + tempz2(i,l,k)*fac2
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+
+!!! can merge these loops because NGLLX = NGLLY = NGLLZ          do l=1,NGLLZ
+            fac3 = hprimewgll_zz(l,k)
+            tempx3l = tempx3l + tempx3(i,j,l)*fac3
+            tempy3l = tempy3l + tempy3(i,j,l)*fac3
+            tempz3l = tempz3l + tempz3(i,j,l)*fac3
+          enddo
+
+          fac1 = wgllwgll_yz(j,k)
+          fac2 = wgllwgll_xz(i,k)
+          fac3 = wgllwgll_xy(i,j)
+
+          sum_terms(1,i,j,k) = - (fac1*tempx1l + fac2*tempx2l + fac3*tempx3l)
+          sum_terms(2,i,j,k) = - (fac1*tempy1l + fac2*tempy2l + fac3*tempy3l)
+          sum_terms(3,i,j,k) = - (fac1*tempz1l + fac2*tempz2l + fac3*tempz3l)
+
+          if(GRAVITY_VAL) sum_terms(:,i,j,k) = sum_terms(:,i,j,k) + rho_s_H(:,i,j,k)
+
+        enddo
+      enddo
+    enddo
+
+! sum contributions from each element to the global mesh and add gravity terms
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+          iglob = ibool(i,j,k,ispec)
+          accel_inner_core(:,iglob) = accel_inner_core(:,iglob) + sum_terms(:,i,j,k)
+        enddo
+      enddo
+    enddo
+
+! use Runge-Kutta scheme to march memory variables in time
+! convention for attenuation
+! term in xx = 1
+! term in yy = 2
+! term in xy = 3
+! term in xz = 4
+! term in yz = 5
+! term in zz not computed since zero trace
+! This is because we only implement Q_\mu attenuation and not Q_\kappa.
+! Note that this does *NOT* imply that there is no attenuation for P waves
+! because for Q_\kappa = infinity one gets (see for instance Dahlen and Tromp (1998)
+! equation (9.59) page 350): Q_\alpha = Q_\mu * 3 * (V_p/V_s)^2 / 4
+! therefore Q_\alpha is not zero; for instance for V_p / V_s = sqrt(3)
+! we get Q_\alpha = (9 / 4) * Q_\mu = 2.25 * Q_\mu
+
+    if(ATTENUATION_VAL .and. ( USE_ATTENUATION_MIMIC .eqv. .false. )) then
+
+      do i_SLS = 1,N_SLS
+
+        ! reformatted R_memory to handle large factor_common and reduced [alpha,beta,gamma]val
+        if (USE_3D_ATTENUATION_ARRAYS) then
+          factor_common_use(:,:,:) = factor_common(i_SLS,:,:,:,ispec) * muvstore(:,:,:,ispec)
+        else
+          factor_common_use(:,:,:) = factor_common(i_SLS,1,1,1,ispec) * muvstore(:,:,:,ispec)
+        endif
+
+!        do i_memory = 1,5
+!          R_memory(i_memory,i_SLS,:,:,:,ispec) = &
+!                  alphaval(i_SLS) * &
+!                  R_memory(i_memory,i_SLS,:,:,:,ispec) + muvstore(:,:,:,ispec) * &
+!                  factor_common_use * &
+!                  (betaval(i_SLS) * &
+!                  epsilondev(i_memory,:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(i_memory,:,:,:))
+!        enddo
+
+        R_xx(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xx(i_SLS,:,:,:,ispec) + factor_common_use(:,:,:) * &
+              (betaval(i_SLS) * epsilondev_xx(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(1,:,:,:))
+
+        R_yy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yy(i_SLS,:,:,:,ispec) + factor_common_use(:,:,:) * &
+              (betaval(i_SLS) * epsilondev_yy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(2,:,:,:))
+
+        R_xy(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xy(i_SLS,:,:,:,ispec) + factor_common_use(:,:,:) * &
+              (betaval(i_SLS) * epsilondev_xy(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(3,:,:,:))
+
+        R_xz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_xz(i_SLS,:,:,:,ispec) + factor_common_use(:,:,:) * &
+              (betaval(i_SLS) * epsilondev_xz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(4,:,:,:))
+
+        R_yz(i_SLS,:,:,:,ispec) = alphaval(i_SLS) * R_yz(i_SLS,:,:,:,ispec) + factor_common_use(:,:,:) * &
+              (betaval(i_SLS) * epsilondev_yz(:,:,:,ispec) + gammaval(i_SLS) * epsilondev_loc(5,:,:,:))
+
+      enddo
+
+    endif
+
+    if (COMPUTE_AND_STORE_STRAIN) then
+! save deviatoric strain for Runge-Kutta scheme
+      !epsilondev(:,:,:,:,ispec) = epsilondev_loc(:,:,:,:)
+      do k=1,NGLLZ
+        do j=1,NGLLY
+          do i=1,NGLLX
+            epsilondev_xx(i,j,k,ispec) = epsilondev_loc(1,i,j,k)
+            epsilondev_yy(i,j,k,ispec) = epsilondev_loc(2,i,j,k)
+            epsilondev_xy(i,j,k,ispec) = epsilondev_loc(3,i,j,k)
+            epsilondev_xz(i,j,k,ispec) = epsilondev_loc(4,i,j,k)
+            epsilondev_yz(i,j,k,ispec) = epsilondev_loc(5,i,j,k)
+          enddo
+        enddo
+      enddo
+
+    endif
+
+  endif   ! end test to exclude fictitious elements in central cube
+
+  enddo ! spectral element loop
+
+  end subroutine compute_forces_inner_core
+

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/compute_forces_outer_core.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,339 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine compute_forces_outer_core(time,deltat,two_omega_earth, &
+                                      NSPEC,NGLOB, &
+                                      A_array_rotation,B_array_rotation, &
+                                      displfluid,accelfluid, &
+                                      div_displfluid,phase_is_inner)
+
+  use constants_solver
+
+  use specfem_par,only: &
+    hprime_xx,hprime_yy,hprime_zz,hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+    wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube, &
+    minus_rho_g_over_kappa_fluid,d_ln_density_dr_table, &
+    MOVIE_VOLUME
+
+  use specfem_par_outercore,only: &
+    xstore => xstore_outer_core,ystore => ystore_outer_core,zstore => zstore_outer_core, &
+    xix => xix_outer_core,xiy => xiy_outer_core,xiz => xiz_outer_core, &
+    etax => etax_outer_core,etay => etay_outer_core,etaz => etaz_outer_core, &
+    gammax => gammax_outer_core,gammay => gammay_outer_core,gammaz => gammaz_outer_core, &
+    ibool => ibool_outer_core, &
+    phase_ispec_inner => phase_ispec_inner_outer_core, &
+    nspec_outer => nspec_outer_outer_core, &
+    nspec_inner => nspec_inner_outer_core
+
+  implicit none
+
+  integer :: NSPEC,NGLOB
+
+  ! for the Euler scheme for rotation
+  real(kind=CUSTOM_REAL) time,deltat,two_omega_earth
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC) :: &
+    A_array_rotation,B_array_rotation
+
+  ! displacement and acceleration
+  real(kind=CUSTOM_REAL), dimension(NGLOB) :: displfluid,accelfluid
+
+  ! divergence of displacement
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: div_displfluid
+
+  ! inner/outer element run flag
+  logical :: phase_is_inner
+
+  ! local parameters
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: tempx1,tempx2,tempx3
+  ! for gravity
+  integer int_radius
+  double precision radius,theta,phi,gxl,gyl,gzl
+  double precision cos_theta,sin_theta,cos_phi,sin_phi
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: gravity_term
+  ! for the Euler scheme for rotation
+  real(kind=CUSTOM_REAL) two_omega_deltat,cos_two_omega_t,sin_two_omega_t,A_rotation,B_rotation, &
+       ux_rotation,uy_rotation,dpotentialdx_with_rot,dpotentialdy_with_rot
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ) :: source_euler_A,source_euler_B
+
+  integer ispec,iglob
+  integer i,j,k,l
+
+  real(kind=CUSTOM_REAL) xixl,xiyl,xizl,etaxl,etayl,etazl,gammaxl,gammayl,gammazl,jacobianl
+  real(kind=CUSTOM_REAL) dpotentialdxl,dpotentialdyl,dpotentialdzl
+  real(kind=CUSTOM_REAL) tempx1l,tempx2l,tempx3l,sum_terms
+
+  double precision grad_x_ln_rho,grad_y_ln_rho,grad_z_ln_rho
+
+!  integer :: computed_elements
+  integer :: num_elements,ispec_p
+  integer :: iphase
+
+! ****************************************************
+!   big loop over all spectral elements in the fluid
+! ****************************************************
+
+  if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. ( .not. phase_is_inner )) div_displfluid(:,:,:,:) = 0._CUSTOM_REAL
+
+!  computed_elements = 0
+  if( .not. phase_is_inner ) then
+    iphase = 1
+    num_elements = nspec_outer
+  else
+    iphase = 2
+    num_elements = nspec_inner
+  endif
+
+  do ispec_p = 1,num_elements
+
+    ispec = phase_ispec_inner(ispec_p,iphase)
+
+    ! only compute element which belong to current phase (inner or outer elements)
+
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempx2l = 0._CUSTOM_REAL
+          tempx3l = 0._CUSTOM_REAL
+
+          do l=1,NGLLX
+            !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+            tempx1l = tempx1l + displfluid(ibool(l,j,k,ispec)) * hprime_xx(i,l)
+            tempx2l = tempx2l + displfluid(ibool(i,l,k,ispec)) * hprime_yy(j,l)
+            tempx3l = tempx3l + displfluid(ibool(i,j,l,ispec)) * hprime_zz(k,l)
+          enddo
+
+          ! get derivatives of velocity potential with respect to x, y and z
+          xixl = xix(i,j,k,ispec)
+          xiyl = xiy(i,j,k,ispec)
+          xizl = xiz(i,j,k,ispec)
+          etaxl = etax(i,j,k,ispec)
+          etayl = etay(i,j,k,ispec)
+          etazl = etaz(i,j,k,ispec)
+          gammaxl = gammax(i,j,k,ispec)
+          gammayl = gammay(i,j,k,ispec)
+          gammazl = gammaz(i,j,k,ispec)
+
+          ! compute the jacobian
+          jacobianl = 1._CUSTOM_REAL / (xixl*(etayl*gammazl-etazl*gammayl) &
+                        - xiyl*(etaxl*gammazl-etazl*gammaxl) &
+                        + xizl*(etaxl*gammayl-etayl*gammaxl))
+
+          dpotentialdxl = xixl*tempx1l + etaxl*tempx2l + gammaxl*tempx3l
+          dpotentialdyl = xiyl*tempx1l + etayl*tempx2l + gammayl*tempx3l
+          dpotentialdzl = xizl*tempx1l + etazl*tempx2l + gammazl*tempx3l
+
+          ! compute contribution of rotation and add to gradient of potential
+          ! this term has no Z component
+          if(ROTATION_VAL) then
+
+            ! store the source for the Euler scheme for A_rotation and B_rotation
+            two_omega_deltat = deltat * two_omega_earth
+
+            cos_two_omega_t = cos(two_omega_earth*time)
+            sin_two_omega_t = sin(two_omega_earth*time)
+
+            ! time step deltat of Euler scheme is included in the source
+            source_euler_A(i,j,k) = two_omega_deltat &
+                  * (cos_two_omega_t * dpotentialdyl + sin_two_omega_t * dpotentialdxl)
+            source_euler_B(i,j,k) = two_omega_deltat &
+                  * (sin_two_omega_t * dpotentialdyl - cos_two_omega_t * dpotentialdxl)
+
+            A_rotation = A_array_rotation(i,j,k,ispec)
+            B_rotation = B_array_rotation(i,j,k,ispec)
+
+            ux_rotation =   A_rotation*cos_two_omega_t + B_rotation*sin_two_omega_t
+            uy_rotation = - A_rotation*sin_two_omega_t + B_rotation*cos_two_omega_t
+
+            dpotentialdx_with_rot = dpotentialdxl + ux_rotation
+            dpotentialdy_with_rot = dpotentialdyl + uy_rotation
+
+          else
+
+            dpotentialdx_with_rot = dpotentialdxl
+            dpotentialdy_with_rot = dpotentialdyl
+
+          endif  ! end of section with rotation
+
+          ! add (chi/rho)grad(rho) term in no gravity case
+          if(.not. GRAVITY_VAL) then
+            ! With regards to the non-gravitating case: we cannot set N^2 = 0 *and* let g = 0.
+            ! We can *either* assume N^2 = 0 but keep gravity g, *or* we can assume that gravity
+            ! is negligible to begin with, as in our GJI 2002a, in which case N does not arise.
+            ! We get:
+            !
+            ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
+            !
+            ! Then the displacement is
+            !
+            ! \bu = \bdel\chi+\chi\bdel\ln\rho = \rho^{-1}\bdel(\rho\chi)
+            !
+            ! and the pressure is
+            !
+            ! p = -\rho\ddot{\chi}
+            !
+            ! Thus in our 2002b GJI paper eqn (21) is wrong, and equation (41)
+            ! in our AGU monograph is incorrect; these equations should be replaced by
+            !
+            ! \ddot\chi = \rho^{-1}\kappa\bdel\cdot(\bdel\chi+\chi\bdel\ln\rho)
+            !
+            ! Note that the fluid potential we use in GJI 2002a differs from the one used here:
+            !
+            ! \chi_GJI2002a = \rho\partial\t\chi
+            !
+            ! such that
+            !
+            ! \bv = \partial_t\bu=\rho^{-1}\bdel\chi_GJI2002a  (GJI 2002a eqn 20)
+            !
+            ! p = - \partial_t\chi_GJI2002a (GJI 2002a eqn 19)
+
+            ! use mesh coordinates to get theta and phi
+            ! x y z contain r theta phi
+            iglob = ibool(i,j,k,ispec)
+
+            radius = dble(xstore(iglob))
+            theta = dble(ystore(iglob))
+            phi = dble(zstore(iglob))
+
+            cos_theta = dcos(theta)
+            sin_theta = dsin(theta)
+            cos_phi = dcos(phi)
+            sin_phi = dsin(phi)
+
+            int_radius = nint(radius * R_EARTH_KM * 10.d0)
+
+            ! grad(rho)/rho in Cartesian components
+            grad_x_ln_rho = sin_theta * cos_phi * d_ln_density_dr_table(int_radius)
+            grad_y_ln_rho = sin_theta * sin_phi * d_ln_density_dr_table(int_radius)
+            grad_z_ln_rho = cos_theta * d_ln_density_dr_table(int_radius)
+
+            ! adding (chi/rho)grad(rho)
+            dpotentialdx_with_rot = dpotentialdx_with_rot + displfluid(iglob) * grad_x_ln_rho
+            dpotentialdy_with_rot = dpotentialdy_with_rot + displfluid(iglob) * grad_y_ln_rho
+            dpotentialdzl = dpotentialdzl + displfluid(iglob) * grad_z_ln_rho
+
+
+         else  ! if gravity is turned on
+
+            ! compute divergence of displacment
+            ! precompute and store gravity term
+
+            ! use mesh coordinates to get theta and phi
+            ! x y z contain r theta phi
+            iglob = ibool(i,j,k,ispec)
+
+            radius = dble(xstore(iglob))
+            theta = dble(ystore(iglob))
+            phi = dble(zstore(iglob))
+
+            cos_theta = dcos(theta)
+            sin_theta = dsin(theta)
+            cos_phi = dcos(phi)
+            sin_phi = dsin(phi)
+
+            ! get g, rho and dg/dr=dg
+            ! spherical components of the gravitational acceleration
+            ! for efficiency replace with lookup table every 100 m in radial direction
+            int_radius = nint(radius * R_EARTH_KM * 10.d0)
+
+            ! Cartesian components of the gravitational acceleration
+            ! integrate and multiply by rho / Kappa
+            gxl = sin_theta*cos_phi
+            gyl = sin_theta*sin_phi
+            gzl = cos_theta
+
+            ! distinguish between single and double precision for reals
+            if(CUSTOM_REAL == SIZE_REAL) then
+              gravity_term(i,j,k) = &
+                sngl(minus_rho_g_over_kappa_fluid(int_radius) * &
+                dble(jacobianl) * wgll_cube(i,j,k) * &
+               (dble(dpotentialdx_with_rot) * gxl + &
+                dble(dpotentialdy_with_rot) * gyl + dble(dpotentialdzl) * gzl))
+            else
+              gravity_term(i,j,k) = minus_rho_g_over_kappa_fluid(int_radius) * &
+                 jacobianl * wgll_cube(i,j,k) * (dpotentialdx_with_rot * gxl + &
+                 dpotentialdy_with_rot * gyl + dpotentialdzl * gzl)
+            endif
+
+            ! divergence of displacement field with gravity on
+            ! note: these calculations are only considered for SIMULATION_TYPE == 1 .and. SAVE_FORWARD
+            !          and one has set MOVIE_VOLUME_TYPE == 4 when MOVIE_VOLUME is .true.;
+            !         in case of SIMULATION_TYPE == 3, it gets overwritten by compute_kernels_outer_core()
+            if (NSPEC_OUTER_CORE_ADJOINT /= 1 .and. MOVIE_VOLUME ) then
+              div_displfluid(i,j,k,ispec) =  &
+                 minus_rho_g_over_kappa_fluid(int_radius) * (dpotentialdx_with_rot * gxl + &
+                 dpotentialdy_with_rot * gyl + dpotentialdzl * gzl)
+            endif
+
+          endif
+
+          tempx1(i,j,k) = jacobianl*(xixl*dpotentialdx_with_rot + xiyl*dpotentialdy_with_rot + xizl*dpotentialdzl)
+          tempx2(i,j,k) = jacobianl*(etaxl*dpotentialdx_with_rot + etayl*dpotentialdy_with_rot + etazl*dpotentialdzl)
+          tempx3(i,j,k) = jacobianl*(gammaxl*dpotentialdx_with_rot + gammayl*dpotentialdy_with_rot + gammazl*dpotentialdzl)
+
+        enddo
+      enddo
+    enddo
+
+    do k=1,NGLLZ
+      do j=1,NGLLY
+        do i=1,NGLLX
+
+          tempx1l = 0._CUSTOM_REAL
+          tempx2l = 0._CUSTOM_REAL
+          tempx3l = 0._CUSTOM_REAL
+
+          do l=1,NGLLX
+            !!! can merge these loops because NGLLX = NGLLY = NGLLZ          enddo
+            tempx1l = tempx1l + tempx1(l,j,k) * hprimewgll_xx(l,i)
+            tempx2l = tempx2l + tempx2(i,l,k) * hprimewgll_yy(l,j)
+            tempx3l = tempx3l + tempx3(i,j,l) * hprimewgll_zz(l,k)
+          enddo
+
+          ! sum contributions from each element to the global mesh and add gravity term
+          sum_terms = - (wgllwgll_yz(j,k)*tempx1l + wgllwgll_xz(i,k)*tempx2l + wgllwgll_xy(i,j)*tempx3l)
+          if(GRAVITY_VAL) sum_terms = sum_terms + gravity_term(i,j,k)
+
+          accelfluid(ibool(i,j,k,ispec)) = accelfluid(ibool(i,j,k,ispec)) + sum_terms
+
+        enddo
+      enddo
+    enddo
+
+    ! update rotation term with Euler scheme
+    if(ROTATION_VAL) then
+      ! use the source saved above
+      A_array_rotation(:,:,:,ispec) = A_array_rotation(:,:,:,ispec) + source_euler_A(:,:,:)
+      B_array_rotation(:,:,:,ispec) = B_array_rotation(:,:,:,ispec) + source_euler_B(:,:,:)
+    endif
+
+  enddo   ! spectral element loop
+
+  end subroutine compute_forces_outer_core
+

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/finalize_simulation.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/finalize_simulation.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/finalize_simulation.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,274 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine finalize_simulation()
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+  use specfem_par_movie
+  implicit none
+
+  ! synchronize all processes, waits until all processes have written their seismograms
+  call sync_all()
+
+  ! closes Stacey absorbing boundary snapshots
+  if( ABSORBING_CONDITIONS ) then
+    ! crust mantle
+    if (nspec2D_xmin_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
+      .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+      call close_file_abs(0)
+    endif
+
+    if (nspec2D_xmax_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
+      .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+      call close_file_abs(1)
+    endif
+
+    if (nspec2D_ymin_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
+      .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+      call close_file_abs(2)
+    endif
+
+    if (nspec2D_ymax_crust_mantle > 0 .and. (SIMULATION_TYPE == 3 &
+      .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+      call close_file_abs(3)
+    endif
+
+    ! outer core
+    if (nspec2D_xmin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+      .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+      call close_file_abs(4)
+    endif
+
+    if (nspec2D_xmax_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+      .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+      call close_file_abs(5)
+    endif
+
+    if (nspec2D_ymin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+      .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+      call close_file_abs(6)
+    endif
+
+    if (nspec2D_ymax_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+      .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+      call close_file_abs(7)
+    endif
+
+    if (nspec2D_zmin_outer_core > 0 .and. (SIMULATION_TYPE == 3 &
+      .or. (SIMULATION_TYPE == 1 .and. SAVE_FORWARD))) then
+      call close_file_abs(8)
+    endif
+
+    ! frees memory
+    deallocate(absorb_xmin_crust_mantle, &
+              absorb_xmax_crust_mantle, &
+              absorb_ymin_crust_mantle, &
+              absorb_ymax_crust_mantle, &
+              absorb_xmin_outer_core, &
+              absorb_xmax_outer_core, &
+              absorb_ymin_outer_core, &
+              absorb_ymax_outer_core, &
+              absorb_zmin_outer_core)
+  endif
+
+  ! save/read the surface movie using the same c routine as we do for absorbing boundaries (file ID is 9)
+  if (NOISE_TOMOGRAPHY/=0) then
+    call close_file_abs(9)
+  endif
+
+  ! save files to local disk or tape system if restart file
+  call save_forward_arrays()
+
+  ! dump kernel arrays
+  if (SIMULATION_TYPE == 3) then
+    ! crust mantle
+    call save_kernels_crust_mantle()
+
+    ! noise strength kernel
+    if (NOISE_TOMOGRAPHY == 3) then
+       call save_kernels_strength_noise()
+    endif
+
+    ! outer core
+    call save_kernels_outer_core()
+
+    ! inner core
+    call save_kernels_inner_core()
+
+    ! boundary kernel
+    if (SAVE_BOUNDARY_MESH) then
+      call save_kernels_boundary_kl()
+    endif
+
+    ! approximate hessian
+    if( APPROXIMATE_HESS_KL ) then
+      call save_kernels_hessian()
+    endif
+  endif
+
+  ! save source derivatives for adjoint simulations
+  if (SIMULATION_TYPE == 2 .and. nrec_local > 0) then
+    call save_kernels_source_derivatives()
+  endif
+
+  ! frees dynamically allocated memory
+
+  ! mass matrices
+  deallocate(rmassx_crust_mantle)
+  deallocate(rmassy_crust_mantle)
+  deallocate(rmassz_crust_mantle)
+
+  deallocate(rmass_outer_core)
+  deallocate(rmass_inner_core)
+
+
+  ! mpi buffers
+  deallocate(buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle, &
+            request_send_vector_cm,request_recv_vector_cm)
+  deallocate(buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core, &
+            request_send_scalar_oc,request_recv_scalar_oc)
+  deallocate(buffer_send_vector_inner_core,buffer_recv_vector_inner_core, &
+            request_send_vector_ic,request_recv_vector_ic)
+
+  if( SIMULATION_TYPE == 3 ) then
+    deallocate(b_buffer_send_vector_cm,b_buffer_recv_vector_cm, &
+              b_request_send_vector_cm,b_request_recv_vector_cm)
+    deallocate(b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core, &
+              b_request_send_scalar_oc,b_request_recv_scalar_oc)
+    deallocate(b_buffer_send_vector_inner_core,b_buffer_recv_vector_inner_core, &
+              b_request_send_vector_ic,b_request_recv_vector_ic)
+  endif
+
+  deallocate(my_neighbours_crust_mantle,nibool_interfaces_crust_mantle)
+  deallocate(ibool_interfaces_crust_mantle)
+  deallocate(my_neighbours_outer_core,nibool_interfaces_outer_core)
+  deallocate(ibool_interfaces_outer_core)
+  deallocate(my_neighbours_inner_core,nibool_interfaces_inner_core)
+  deallocate(ibool_interfaces_inner_core)
+
+  ! inner/outer elements
+  deallocate(phase_ispec_inner_crust_mantle)
+  deallocate(phase_ispec_inner_outer_core)
+  deallocate(phase_ispec_inner_inner_core)
+
+  ! coloring
+  deallocate(num_elem_colors_crust_mantle)
+  deallocate(num_elem_colors_outer_core)
+  deallocate(num_elem_colors_inner_core)
+
+  ! sources
+  deallocate(islice_selected_source, &
+             ispec_selected_source, &
+             Mxx,Myy,Mzz,Mxy,Mxz,Myz)
+  deallocate(xi_source,eta_source,gamma_source)
+  deallocate(tshift_cmt,hdur,hdur_gaussian)
+  deallocate(nu_source)
+
+  if (SIMULATION_TYPE == 1 .or. SIMULATION_TYPE == 3) deallocate(sourcearrays)
+  if (SIMULATION_TYPE == 2 .or. SIMULATION_TYPE == 3) then
+    deallocate(iadj_vec)
+    if(nadj_rec_local > 0) then
+      deallocate(adj_sourcearrays)
+      deallocate(iadjsrc,iadjsrc_len)
+    endif
+  endif
+
+  ! receivers
+  deallocate(islice_selected_rec,ispec_selected_rec, &
+            xi_receiver,eta_receiver,gamma_receiver)
+  deallocate(station_name,network_name, &
+            stlat,stlon,stele,stbur)
+  deallocate(nu,number_receiver_global)
+
+  if( nrec_local > 0 ) then
+    deallocate(hxir_store, &
+              hetar_store, &
+              hgammar_store)
+    if( SIMULATION_TYPE == 2 ) then
+      deallocate(moment_der,stshift_der)
+    endif
+  endif
+  deallocate(seismograms)
+
+  if (SIMULATION_TYPE == 3) then
+    if( APPROXIMATE_HESS_KL ) then
+      deallocate(hess_kl_crust_mantle)
+    endif
+    deallocate(beta_kl_outer_core)
+  endif
+
+  ! movies
+  if(MOVIE_SURFACE .or. NOISE_TOMOGRAPHY /= 0 ) then
+    deallocate(store_val_x,store_val_y,store_val_z, &
+              store_val_ux,store_val_uy,store_val_uz)
+    if (MOVIE_SURFACE) then
+      deallocate(store_val_x_all,store_val_y_all,store_val_z_all, &
+            store_val_ux_all,store_val_uy_all,store_val_uz_all)
+    endif
+  endif
+  if(MOVIE_VOLUME) then
+    deallocate(nu_3dmovie)
+  endif
+
+  ! noise simulations
+  if ( NOISE_TOMOGRAPHY /= 0 ) then
+    deallocate(noise_sourcearray, &
+              normal_x_noise,normal_y_noise,normal_z_noise, &
+              mask_noise,noise_surface_movie)
+  endif
+
+  ! vtk visualization
+  if( VTK_MODE ) then
+    ! closes/cleans up vtk window
+    if(myrank == 0 ) call finish_vtkwindow()
+
+    ! frees memory
+    deallocate(vtkdata,vtkmask)
+    if( NPROCTOT_VAL > 1 ) then
+      deallocate(vtkdata_points_all,vtkdata_offset_all)
+      if( myrank == 0 ) deallocate(vtkdata_all)
+    endif
+  endif
+
+  ! close the main output file
+  if(myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) 'End of the simulation'
+    write(IMAIN,*)
+    close(IMAIN)
+  endif
+
+  ! synchronize all the processes to make sure everybody has finished
+  call sync_all()
+
+  if (ADIOS_ENABLED) then
+    call adios_cleanup()
+  endif
+  end subroutine finalize_simulation

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/iterate_time.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/iterate_time.F90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/iterate_time.F90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,655 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine iterate_time()
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+  use specfem_par_movie
+  implicit none
+
+  ! timing
+  double precision, external :: wtime
+
+!
+!   s t a r t   t i m e   i t e r a t i o n s
+!
+
+  ! synchronize all processes to make sure everybody is ready to start time loop
+  call sync_all()
+
+  if(myrank == 0) then
+    write(IMAIN,*)
+    write(IMAIN,*) 'Starting time iteration loop...'
+    write(IMAIN,*)
+  endif
+
+  ! create an empty file to monitor the start of the simulation
+  if(myrank == 0) then
+    open(unit=IOUT,file=trim(OUTPUT_FILES)//'/starttimeloop.txt',status='unknown',action='write')
+    write(IOUT,*) 'hello, starting time loop'
+    close(IOUT)
+  endif
+
+  ! initialize variables for writing seismograms
+  seismo_offset = it_begin-1
+  seismo_current = 0
+
+#ifdef _HANDOPT
+  imodulo_NGLOB_CRUST_MANTLE = mod(NGLOB_CRUST_MANTLE,3)
+  imodulo_NGLOB_CRUST_MANTLE4 = mod(NGLOB_CRUST_MANTLE,4)
+  imodulo_NGLOB_INNER_CORE = mod(NGLOB_INNER_CORE,3)
+  imodulo_NGLOB_OUTER_CORE = mod(NGLOB_OUTER_CORE,3)
+#endif
+
+  ! get MPI starting time
+  time_start = wtime()
+
+  ! *********************************************************
+  ! ************* MAIN LOOP OVER THE TIME STEPS *************
+  ! *********************************************************
+
+  do it = it_begin,it_end
+
+    ! simulation status output and stability check
+    if(mod(it,NTSTEP_BETWEEN_OUTPUT_INFO) == 0 .or. it == 5 .or. it == NSTEP) then
+      call it_check_stability()
+    endif
+
+    ! update displacement using Newmark time scheme
+    call it_update_displacement_scheme()
+
+    ! acoustic solver for outer core
+    ! (needs to be done first, before elastic one)
+    call compute_forces_acoustic()
+
+    ! elastic solver for crust/mantle and inner core
+    call compute_forces_elastic()
+
+    ! restores last time snapshot saved for backward/reconstruction of wavefields
+    ! note: this is done here after the Newmark time scheme, otherwise the indexing for sources
+    !          and adjoint sources will become more complicated
+    !          that is, index it for adjoint sources will match index NSTEP - 1 for backward/reconstructed wavefields
+    if( SIMULATION_TYPE == 3 .and. it == 1 ) then
+      call read_forward_arrays()
+    endif
+
+    ! write the seismograms with time shift
+    if( nrec_local > 0 .or. ( WRITE_SEISMOGRAMS_BY_MASTER .and. myrank == 0 ) ) then
+      call write_seismograms()
+    endif
+
+    ! adjoint simulations: kernels
+    if( SIMULATION_TYPE == 3 ) then
+      call compute_kernels()
+    endif
+
+    ! outputs movie files
+    call write_movie_output()
+
+    ! first step of noise tomography, i.e., save a surface movie at every time step
+    ! modified from the subroutine 'write_movie_surface'
+    if( NOISE_TOMOGRAPHY == 1 ) then
+      call noise_save_surface_movie()
+    endif
+
+    ! updates vtk window
+    if( VTK_MODE ) then
+      call it_update_vtkwindow()
+    endif
+
+  enddo   ! end of main time loop
+
+  !
+  !---- end of time iteration loop
+  !
+
+  call it_print_elapsed_time()
+
+  ! Transfer fields from GPU card to host for further analysis
+  if(GPU_MODE) call it_transfer_from_GPU()
+
+  end subroutine iterate_time
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine it_update_displacement_scheme()
+
+! explicit Newmark time scheme with acoustic & elastic domains:
+! (see e.g. Hughes, 1987; Chaljub et al., 2003)
+!
+! chi(t+delta_t) = chi(t) + delta_t chi_dot(t) + 1/2 delta_t**2 chi_dot_dot(t)
+! chi_dot(t+delta_t) = chi_dot(t) + 1/2 delta_t chi_dot_dot(t) + 1/2 delta_t chi_dot_dot(t+delta_t)
+! chi_dot_dot(t+delta_t) = 1/M_acoustic( -K_acoustic chi(t+delta) + B_acoustic u(t+delta_t) + f(t+delta_t) )
+!
+! u(t+delta_t) = u(t) + delta_t  v(t) + 1/2  delta_t**2 a(t)
+! v(t+delta_t) = v(t) + 1/2 delta_t a(t) + 1/2 delta_t a(t+delta_t)
+! a(t+delta_t) = 1/M_elastic ( -K_elastic u(t+delta) + B_elastic chi_dot_dot(t+delta_t) + f( t+delta_t) )
+!
+! where
+!   chi, chi_dot, chi_dot_dot are acoustic (fluid) potentials ( dotted with respect to time)
+!   u, v, a are displacement,velocity & acceleration
+!   M is mass matrix, K stiffness matrix and B boundary term for acoustic/elastic domains
+!   f denotes a source term (acoustic/elastic)
+!
+! note that this stage calculates the predictor terms
+!
+!   for
+!   potential chi_dot(t+delta) requires + 1/2 delta_t chi_dot_dot(t+delta_t)
+!                                   at a later stage (corrector) once where chi_dot_dot(t+delta) is calculated
+!   and similar,
+!   velocity v(t+delta_t) requires  + 1/2 delta_t a(t+delta_t)
+!                                   at a later stage once where a(t+delta) is calculated
+! also:
+!   boundary term B_elastic requires chi_dot_dot(t+delta)
+!                                   thus chi_dot_dot has to be updated first before the elastic boundary term is considered
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+  implicit none
+
+  ! local parameters
+  integer :: i
+
+  ! updates wavefields
+  if( .not. GPU_MODE) then
+  ! on CPU
+
+    ! Newmark time scheme update
+#ifdef _HANDOPT_NEWMARK
+! way 2:
+! One common technique in computational science to help enhance pipelining is loop unrolling
+!
+! we're accessing NDIM=3 components at each line,
+! that is, for an iteration, the register must contain
+! NDIM * displ_ + NDIM * veloc_ + NDIM * accel + deltat + deltatsq..
+! in most cases a real (CUSTOM_REAL) value will have 4 bytes,
+! assuming a default cache size of about 128 bytes, we unroll here in steps of 3, thus 29 reals or 118 bytes,
+! rather than with steps of 4
+    ! mantle
+    if(imodulo_NGLOB_CRUST_MANTLE >= 1) then
+      do i = 1,imodulo_NGLOB_CRUST_MANTLE
+        displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
+          + deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
+
+        veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
+          + deltatover2*accel_crust_mantle(:,i)
+
+        accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+      enddo
+    endif
+    do i = imodulo_NGLOB_CRUST_MANTLE+1,NGLOB_CRUST_MANTLE, 3 ! in steps of 3
+      displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
+        + deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
+      displ_crust_mantle(:,i+1) = displ_crust_mantle(:,i+1) &
+        + deltat*veloc_crust_mantle(:,i+1) + deltatsqover2*accel_crust_mantle(:,i+1)
+      displ_crust_mantle(:,i+2) = displ_crust_mantle(:,i+2) &
+        + deltat*veloc_crust_mantle(:,i+2) + deltatsqover2*accel_crust_mantle(:,i+2)
+
+
+      veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
+        + deltatover2*accel_crust_mantle(:,i)
+      veloc_crust_mantle(:,i+1) = veloc_crust_mantle(:,i+1) &
+        + deltatover2*accel_crust_mantle(:,i+1)
+      veloc_crust_mantle(:,i+2) = veloc_crust_mantle(:,i+2) &
+        + deltatover2*accel_crust_mantle(:,i+2)
+
+      ! set acceleration to zero
+      ! note: we do initialize acceleration in this loop since it is read already into the cache,
+      !           otherwise it would have to be read in again for this explicitly,
+      !           which would make this step more expensive
+      accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+      accel_crust_mantle(:,i+1) = 0._CUSTOM_REAL
+      accel_crust_mantle(:,i+2) = 0._CUSTOM_REAL
+    enddo
+
+    ! outer core
+    do i=1,NGLOB_OUTER_CORE
+      displ_outer_core(i) = displ_outer_core(i) &
+        + deltat*veloc_outer_core(i) + deltatsqover2*accel_outer_core(i)
+
+      veloc_outer_core(i) = veloc_outer_core(i) &
+        + deltatover2*accel_outer_core(i)
+
+      accel_outer_core(i) = 0._CUSTOM_REAL
+    enddo
+
+    ! inner core
+    if(imodulo_NGLOB_INNER_CORE >= 1) then
+      do i = 1,imodulo_NGLOB_INNER_CORE
+        displ_inner_core(:,i) = displ_inner_core(:,i) &
+          + deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
+
+        veloc_inner_core(:,i) = veloc_inner_core(:,i) &
+          + deltatover2*accel_inner_core(:,i)
+
+        accel_inner_core(:,i) = 0._CUSTOM_REAL
+      enddo
+    endif
+    do i = imodulo_NGLOB_INNER_CORE+1,NGLOB_INNER_CORE, 3 ! in steps of 3
+      displ_inner_core(:,i) = displ_inner_core(:,i) &
+        + deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
+      displ_inner_core(:,i+1) = displ_inner_core(:,i+1) &
+        + deltat*veloc_inner_core(:,i+1) + deltatsqover2*accel_inner_core(:,i+1)
+      displ_inner_core(:,i+2) = displ_inner_core(:,i+2) &
+        + deltat*veloc_inner_core(:,i+2) + deltatsqover2*accel_inner_core(:,i+2)
+
+
+      veloc_inner_core(:,i) = veloc_inner_core(:,i) &
+        + deltatover2*accel_inner_core(:,i)
+      veloc_inner_core(:,i+1) = veloc_inner_core(:,i+1) &
+        + deltatover2*accel_inner_core(:,i+1)
+      veloc_inner_core(:,i+2) = veloc_inner_core(:,i+2) &
+        + deltatover2*accel_inner_core(:,i+2)
+
+      accel_inner_core(:,i) = 0._CUSTOM_REAL
+      accel_inner_core(:,i+1) = 0._CUSTOM_REAL
+      accel_inner_core(:,i+2) = 0._CUSTOM_REAL
+    enddo
+#else
+! way 1:
+    ! mantle
+    do i=1,NGLOB_CRUST_MANTLE
+      displ_crust_mantle(:,i) = displ_crust_mantle(:,i) &
+        + deltat*veloc_crust_mantle(:,i) + deltatsqover2*accel_crust_mantle(:,i)
+      veloc_crust_mantle(:,i) = veloc_crust_mantle(:,i) &
+        + deltatover2*accel_crust_mantle(:,i)
+      accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+    enddo
+    ! outer core
+    do i=1,NGLOB_OUTER_CORE
+      displ_outer_core(i) = displ_outer_core(i) &
+        + deltat*veloc_outer_core(i) + deltatsqover2*accel_outer_core(i)
+      veloc_outer_core(i) = veloc_outer_core(i) &
+        + deltatover2*accel_outer_core(i)
+      accel_outer_core(i) = 0._CUSTOM_REAL
+    enddo
+    ! inner core
+    do i=1,NGLOB_INNER_CORE
+      displ_inner_core(:,i) = displ_inner_core(:,i) &
+        + deltat*veloc_inner_core(:,i) + deltatsqover2*accel_inner_core(:,i)
+      veloc_inner_core(:,i) = veloc_inner_core(:,i) &
+        + deltatover2*accel_inner_core(:,i)
+      accel_inner_core(:,i) = 0._CUSTOM_REAL
+    enddo
+#endif
+
+    ! backward field
+    if (SIMULATION_TYPE == 3) then
+
+#ifdef _HANDOPT_NEWMARK
+! way 2:
+      ! mantle
+      if(imodulo_NGLOB_CRUST_MANTLE >= 1) then
+        do i=1,imodulo_NGLOB_CRUST_MANTLE
+          b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
+            + b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
+          b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
+            + b_deltatover2*b_accel_crust_mantle(:,i)
+          b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+        enddo
+      endif
+      do i=imodulo_NGLOB_CRUST_MANTLE+1,NGLOB_CRUST_MANTLE,3
+        b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
+          + b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
+        b_displ_crust_mantle(:,i+1) = b_displ_crust_mantle(:,i+1) &
+          + b_deltat*b_veloc_crust_mantle(:,i+1) + b_deltatsqover2*b_accel_crust_mantle(:,i+1)
+        b_displ_crust_mantle(:,i+2) = b_displ_crust_mantle(:,i+2) &
+          + b_deltat*b_veloc_crust_mantle(:,i+2) + b_deltatsqover2*b_accel_crust_mantle(:,i+2)
+
+
+        b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
+          + b_deltatover2*b_accel_crust_mantle(:,i)
+        b_veloc_crust_mantle(:,i+1) = b_veloc_crust_mantle(:,i+1) &
+          + b_deltatover2*b_accel_crust_mantle(:,i+1)
+        b_veloc_crust_mantle(:,i+2) = b_veloc_crust_mantle(:,i+2) &
+          + b_deltatover2*b_accel_crust_mantle(:,i+2)
+
+        b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+        b_accel_crust_mantle(:,i+1) = 0._CUSTOM_REAL
+        b_accel_crust_mantle(:,i+2) = 0._CUSTOM_REAL
+      enddo
+
+      ! outer core
+      do i=1,NGLOB_OUTER_CORE
+        b_displ_outer_core(i) = b_displ_outer_core(i) &
+          + b_deltat*b_veloc_outer_core(i) + b_deltatsqover2*b_accel_outer_core(i)
+        b_veloc_outer_core(i) = b_veloc_outer_core(i) &
+          + b_deltatover2*b_accel_outer_core(i)
+        b_accel_outer_core(i) = 0._CUSTOM_REAL
+      enddo
+
+      ! inner core
+      if(imodulo_NGLOB_INNER_CORE >= 1) then
+        do i=1,imodulo_NGLOB_INNER_CORE
+          b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
+            + b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
+          b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
+            + b_deltatover2*b_accel_inner_core(:,i)
+          b_accel_inner_core(:,i) = 0._CUSTOM_REAL
+        enddo
+      endif
+      do i=imodulo_NGLOB_INNER_CORE+1,NGLOB_INNER_CORE,3
+        b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
+          + b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
+        b_displ_inner_core(:,i+1) = b_displ_inner_core(:,i+1) &
+          + b_deltat*b_veloc_inner_core(:,i+1) + b_deltatsqover2*b_accel_inner_core(:,i+1)
+        b_displ_inner_core(:,i+2) = b_displ_inner_core(:,i+2) &
+          + b_deltat*b_veloc_inner_core(:,i+2) + b_deltatsqover2*b_accel_inner_core(:,i+2)
+
+        b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
+          + b_deltatover2*b_accel_inner_core(:,i)
+        b_veloc_inner_core(:,i+1) = b_veloc_inner_core(:,i+1) &
+          + b_deltatover2*b_accel_inner_core(:,i+1)
+        b_veloc_inner_core(:,i+2) = b_veloc_inner_core(:,i+2) &
+          + b_deltatover2*b_accel_inner_core(:,i+2)
+
+        b_accel_inner_core(:,i) = 0._CUSTOM_REAL
+        b_accel_inner_core(:,i+1) = 0._CUSTOM_REAL
+        b_accel_inner_core(:,i+2) = 0._CUSTOM_REAL
+      enddo
+#else
+! way 1:
+      ! mantle
+      do i=1,NGLOB_CRUST_MANTLE
+        b_displ_crust_mantle(:,i) = b_displ_crust_mantle(:,i) &
+          + b_deltat*b_veloc_crust_mantle(:,i) + b_deltatsqover2*b_accel_crust_mantle(:,i)
+        b_veloc_crust_mantle(:,i) = b_veloc_crust_mantle(:,i) &
+          + b_deltatover2*b_accel_crust_mantle(:,i)
+        b_accel_crust_mantle(:,i) = 0._CUSTOM_REAL
+      enddo
+      ! outer core
+      do i=1,NGLOB_OUTER_CORE
+        b_displ_outer_core(i) = b_displ_outer_core(i) &
+          + b_deltat*b_veloc_outer_core(i) + b_deltatsqover2*b_accel_outer_core(i)
+        b_veloc_outer_core(i) = b_veloc_outer_core(i) &
+          + b_deltatover2*b_accel_outer_core(i)
+        b_accel_outer_core(i) = 0._CUSTOM_REAL
+      enddo
+      ! inner core
+      do i=1,NGLOB_INNER_CORE
+        b_displ_inner_core(:,i) = b_displ_inner_core(:,i) &
+          + b_deltat*b_veloc_inner_core(:,i) + b_deltatsqover2*b_accel_inner_core(:,i)
+        b_veloc_inner_core(:,i) = b_veloc_inner_core(:,i) &
+          + b_deltatover2*b_accel_inner_core(:,i)
+        b_accel_inner_core(:,i) = 0._CUSTOM_REAL
+      enddo
+#endif
+    endif ! SIMULATION_TYPE == 3
+  else
+    ! on GPU
+    ! Includes SIM_TYPE 1 & 3
+
+    ! outer core region
+    call it_update_displacement_oc_cuda(Mesh_pointer, &
+                                       deltat, deltatsqover2, deltatover2, &
+                                       b_deltat, b_deltatsqover2, b_deltatover2)
+    ! inner core region
+    call it_update_displacement_ic_cuda(Mesh_pointer, &
+                                       deltat, deltatsqover2, deltatover2, &
+                                       b_deltat, b_deltatsqover2, b_deltatover2)
+
+    ! crust/mantle region
+    call it_update_displacement_cm_cuda(Mesh_pointer, &
+                                       deltat, deltatsqover2, deltatover2, &
+                                       b_deltat, b_deltatsqover2, b_deltatover2)
+  endif
+
+  end subroutine it_update_displacement_scheme
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine it_print_elapsed_time()
+
+  use specfem_par
+  implicit none
+
+  ! local parameters
+  integer :: ihours,iminutes,iseconds,int_tCPU
+  ! timing
+  double precision, external :: wtime
+
+  if(myrank == 0) then
+     ! elapsed time since beginning of the simulation
+     tCPU = wtime() - time_start
+
+     int_tCPU = int(tCPU)
+     ihours = int_tCPU / 3600
+     iminutes = (int_tCPU - 3600*ihours) / 60
+     iseconds = int_tCPU - 3600*ihours - 60*iminutes
+     write(IMAIN,*) 'Time-Loop Complete. Timing info:'
+     write(IMAIN,*) 'Total elapsed time in seconds = ',tCPU
+     write(IMAIN,"(' Total elapsed time in hh:mm:ss = ',i4,' h ',i2.2,' m ',i2.2,' s')") ihours,iminutes,iseconds
+  endif
+  end subroutine it_print_elapsed_time
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine it_check_stability()
+
+! computes the maximum of the norm of the displacement
+! in all the slices using an MPI reduction
+! and output timestamp file to check that simulation is running fine
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+  use specfem_par_movie
+  implicit none
+
+  ! compute the maximum of the norm of the displacement
+  ! in all the slices using an MPI reduction
+  ! and output timestamp file to check that simulation is running fine
+  call check_simulation_stability(it,displ_crust_mantle,displ_inner_core,displ_outer_core, &
+                        b_displ_crust_mantle,b_displ_inner_core,b_displ_outer_core, &
+                        eps_trace_over_3_crust_mantle, &
+                        epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+                        epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+                        SIMULATION_TYPE,OUTPUT_FILES,time_start,DT,t0,NSTEP, &
+                        myrank)
+
+  ! debug output
+  !if( maxval(displ_crust_mantle(1,:)**2 + &
+  !                displ_crust_mantle(2,:)**2 + displ_crust_mantle(3,:)**2) > 1.e4 ) then
+  !  print*,'slice',myrank
+  !  print*,'  crust_mantle displ:', maxval(displ_crust_mantle(1,:)), &
+  !           maxval(displ_crust_mantle(2,:)),maxval(displ_crust_mantle(3,:))
+  !  print*,'  indxs: ',maxloc( displ_crust_mantle(1,:)),maxloc( displ_crust_mantle(2,:)),maxloc( displ_crust_mantle(3,:))
+  !  indx = maxloc( displ_crust_mantle(3,:) )
+  !  rval = xstore_crust_mantle(indx(1))
+  !  thetaval = ystore_crust_mantle(indx(1))
+  !  phival = zstore_crust_mantle(indx(1))
+  !  !thetaval = PI_OVER_TWO-datan(1.006760466d0*dcos(dble(thetaval))/dmax1(TINYVAL,dsin(dble(thetaval))))
+  !  print*,'r/lat/lon:',rval*R_EARTH_KM,90.0-thetaval*180./PI,phival*180./PI
+  !  call rthetaphi_2_xyz(rval,thetaval,phival,xstore_crust_mantle(indx(1)),&
+  !                     ystore_crust_mantle(indx(1)),zstore_crust_mantle(indx(1)))
+  !  print*,'x/y/z:',rval,thetaval,phival
+  !  call exit_MPI(myrank,'error stability')
+  !endif
+
+  end subroutine it_check_stability
+
+!
+!-------------------------------------------------------------------------------------------------
+!
+
+  subroutine it_transfer_from_GPU()
+
+! transfers fields on GPU back onto CPU
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+  implicit none
+
+  ! to store forward wave fields
+  if (SIMULATION_TYPE == 1 .and. SAVE_FORWARD) then
+
+    call transfer_fields_cm_from_device(NDIM*NGLOB_CRUST_MANTLE, &
+                                    displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle, &
+                                    Mesh_pointer)
+    call transfer_fields_ic_from_device(NDIM*NGLOB_INNER_CORE, &
+                                    displ_inner_core,veloc_inner_core,accel_inner_core, &
+                                    Mesh_pointer)
+    call transfer_fields_oc_from_device(NGLOB_OUTER_CORE, &
+                                    displ_outer_core,veloc_outer_core,accel_outer_core, &
+                                    Mesh_pointer)
+
+    call transfer_strain_cm_from_device(Mesh_pointer,eps_trace_over_3_crust_mantle, &
+                                    epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle, &
+                                    epsilondev_xy_crust_mantle,epsilondev_xz_crust_mantle, &
+                                    epsilondev_yz_crust_mantle)
+    call transfer_strain_ic_from_device(Mesh_pointer,eps_trace_over_3_inner_core, &
+                                    epsilondev_xx_inner_core,epsilondev_yy_inner_core, &
+                                    epsilondev_xy_inner_core,epsilondev_xz_inner_core, &
+                                    epsilondev_yz_inner_core)
+
+    if (ROTATION_VAL) then
+      call transfer_rotation_from_device(Mesh_pointer,A_array_rotation,B_array_rotation)
+    endif
+
+    ! note: for kernel simulations (SIMULATION_TYPE == 3), attenuation is
+    !          only mimicking effects on phase shifts, but not on amplitudes.
+    !          flag USE_ATTENUATION_MIMIC will have to be set to true in this case.
+    !
+    ! arrays b_R_xx, ... are not used when USE_ATTENUATION_MIMIC is set,
+    ! therefore no need to transfer arrays from GPU to CPU
+    !if (ATTENUATION) then
+    !endif
+
+  else if (SIMULATION_TYPE == 3) then
+    ! to store kernels
+    call transfer_kernels_oc_to_host(Mesh_pointer, &
+                                     rho_kl_outer_core,&
+                                     alpha_kl_outer_core,NSPEC_OUTER_CORE)
+    call transfer_kernels_cm_to_host(Mesh_pointer, &
+                                     rho_kl_crust_mantle, &
+                                     alpha_kl_crust_mantle, &
+                                     beta_kl_crust_mantle, &
+                                     cijkl_kl_crust_mantle,NSPEC_CRUST_MANTLE)
+    call transfer_kernels_ic_to_host(Mesh_pointer, &
+                                     rho_kl_inner_core, &
+                                     alpha_kl_inner_core, &
+                                     beta_kl_inner_core,NSPEC_INNER_CORE)
+
+    ! specific noise strength kernel
+    if( NOISE_TOMOGRAPHY == 3 ) then
+      call transfer_kernels_noise_to_host(Mesh_pointer,Sigma_kl_crust_mantle,NSPEC_CRUST_MANTLE)
+    endif
+
+    ! approximative hessian for preconditioning kernels
+    if ( APPROXIMATE_HESS_KL ) then
+      call transfer_kernels_hess_cm_tohost(Mesh_pointer,hess_kl_crust_mantle,NSPEC_CRUST_MANTLE)
+    endif
+  endif
+
+  ! frees allocated memory on GPU
+  call prepare_cleanup_device(Mesh_pointer,NCHUNKS_VAL)
+
+  end subroutine it_transfer_from_GPU
+
+!=====================================================================
+
+
+  subroutine it_update_vtkwindow()
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_movie
+
+  implicit none
+
+  real :: currenttime
+  integer :: iglob,inum
+  real(kind=CUSTOM_REAL),dimension(1):: dummy
+
+  ! vtk rendering at frame interval
+  if( mod(it,NTSTEP_BETWEEN_FRAMES) == 0 ) then
+
+    ! user output
+    !if( myrank == 0 ) print*,"  vtk rendering..."
+
+    ! updates time
+    currenttime = sngl((it-1)*DT-t0)
+
+    ! transfers fields from GPU to host
+    if( GPU_MODE ) then
+      !if( myrank == 0 ) print*,"  vtk: transfering velocity from gpu"
+      call transfer_veloc_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,veloc_crust_mantle,Mesh_pointer)
+    endif
+
+    ! updates wavefield
+    !if( myrank == 0 ) print*,"  vtk: it = ",it," out of ",it_end," - norm of velocity field"
+    inum = 0
+    vtkdata(:) = 0.0
+    do iglob = 1,NGLOB_CRUST_MANTLE
+      if( vtkmask(iglob) .eqv. .true. ) then
+        inum = inum + 1
+        ! stores norm of velocity vector
+        vtkdata(inum) = sqrt(veloc_crust_mantle(1,iglob)**2 &
+                           + veloc_crust_mantle(2,iglob)**2 &
+                           + veloc_crust_mantle(3,iglob)**2)
+      endif
+    enddo
+
+    ! updates for multiple mpi process
+    if( NPROCTOT_VAL > 1 ) then
+      if( myrank == 0 ) then
+        ! gather data
+        call gatherv_all_cr(vtkdata,size(vtkdata),&
+                            vtkdata_all,vtkdata_points_all,vtkdata_offset_all, &
+                            vtkdata_numpoints_all,NPROCTOT_VAL)
+        ! updates vtk window
+        call visualize_vtkdata(it,currenttime,vtkdata_all)
+      else
+        ! all other process just send data
+        call gatherv_all_cr(vtkdata,size(vtkdata),&
+                            dummy,vtkdata_points_all,vtkdata_offset_all, &
+                            1,NPROCTOT_VAL)
+      endif
+    else
+      ! serial run
+      ! updates vtk window
+      call visualize_vtkdata(it,currenttime,vtkdata)
+    endif
+
+  endif
+
+  end subroutine it_update_vtkwindow
+

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_arrays_solver_adios.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_arrays_solver_adios.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_arrays_solver_adios.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,455 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!===============================================================================
+!> \brief Read adios arrays created by the mesher (file: regX_solver_data.bp)
+subroutine read_arrays_solver_adios(iregion_code,myrank, &
+              nspec,nglob,nglob_xy, &
+              nspec_iso,nspec_tiso,nspec_ani, &
+              rho_vp,rho_vs,xstore,ystore,zstore, &
+              xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz, &
+              rhostore, kappavstore,muvstore,kappahstore,muhstore,eta_anisostore, &
+              c11store,c12store,c13store,c14store,c15store,c16store,c22store, &
+              c23store,c24store,c25store,c26store,c33store,c34store,c35store, &
+              c36store,c44store,c45store,c46store,c55store,c56store,c66store, &
+              ibool,idoubling,ispec_is_tiso, &
+              rmassx,rmassy,rmassz,rmass_ocean_load, &
+              READ_KAPPA_MU,READ_TISO, &
+              ABSORBING_CONDITIONS,LOCAL_PATH)
+
+  use mpi 
+  use adios_read_mod
+  implicit none
+
+  include "constants.h"
+  include "OUTPUT_FILES/values_from_mesher.h"
+
+  integer :: iregion_code,myrank
+  integer :: nspec,nglob,nglob_xy
+  integer :: nspec_iso,nspec_tiso,nspec_ani
+
+  ! Stacey
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,nspec):: rho_vp,rho_vs
+
+  real(kind=CUSTOM_REAL), dimension(nglob) :: xstore,ystore,zstore
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec) :: &
+    xix,xiy,xiz,etax,etay,etaz,gammax,gammay,gammaz
+
+  ! material properties
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,nspec_iso) :: &
+    rhostore,kappavstore,muvstore
+
+  ! additional arrays for anisotropy stored only where needed to save memory
+  real(kind=CUSTOM_REAL),dimension(NGLLX,NGLLY,NGLLZ,nspec_tiso) :: &
+    kappahstore,muhstore,eta_anisostore
+
+  ! additional arrays for full anisotropy
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,nspec_ani) :: &
+    c11store,c12store,c13store,c14store,c15store,c16store, &
+    c22store,c23store,c24store,c25store,c26store,c33store,c34store, &
+    c35store,c36store,c44store,c45store,c46store,c55store,c56store,c66store
+
+  ! global addressing
+  integer,dimension(NGLLX,NGLLY,NGLLZ,nspec) :: ibool
+  integer, dimension(nspec) :: idoubling
+  logical, dimension(nspec) :: ispec_is_tiso
+
+  ! mass matrices and additional ocean load mass matrix
+  real(kind=CUSTOM_REAL), dimension(nglob_xy) :: rmassx,rmassy
+  real(kind=CUSTOM_REAL), dimension(nglob)    :: rmassz
+  real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE_OCEANS) :: rmass_ocean_load
+
+  ! flags to know if we should read Vs and anisotropy arrays
+  logical :: READ_KAPPA_MU,READ_TISO,ABSORBING_CONDITIONS
+
+  character(len=150) :: LOCAL_PATH, file_name
+
+  ! local parameters
+  integer :: ierr, comm, lnspec, lnglob, local_dim
+  ! processor identification
+  character(len=150) :: prname
+  ! ADIOS variables
+  integer                 :: adios_err
+  integer(kind=8)         :: adios_group, adios_handle, varid
+  integer(kind=8)         :: adios_groupsize, adios_totalsize
+  integer :: vars_count, attrs_count, current_step, last_step, vsteps
+  character(len=128), dimension(:), allocatable :: adios_names 
+  integer(kind=8), dimension(1) :: start, count
+
+  integer(kind=8), dimension(256),target :: selections
+  integer :: sel_num, i
+  integer(kind=8), pointer :: sel => null()
+
+  sel_num = 0
+
+  ! create a prefix for the file name such as LOCAL_PATH/regX_
+  call create_name_database_adios(prname, iregion_code, LOCAL_PATH)
+
+  ! Postpend the actual file name.
+  file_name= trim(prname) // "solver_data.bp" 
+  call MPI_Comm_dup (MPI_COMM_WORLD, comm, ierr)
+
+  ! Setup the ADIOS library to read the file
+  call adios_read_init_method (ADIOS_READ_METHOD_BP, comm, &
+      "verbose=1", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_open_file (adios_handle, file_name, 0, comm, ierr)
+  call check_adios_err(myrank,adios_err)
+
+  ! read coordinates of the mesh
+  sel_num = sel_num+1
+  sel => selections(sel_num)
+  call adios_selection_writeblock(sel, myrank)
+  call adios_schedule_read(adios_handle, sel, "nspec", 0, 1, &
+     lnspec, adios_err)
+  call adios_schedule_read(adios_handle, sel, "nglob", 0, 1, &
+     lnglob, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec", lnspec, adios_err)
+  !call adios_get_scalar(adios_handle, "nglob", lnglob, adios_err)
+  !call adios_perform_reads(adios_handle, adios_err)
+  !call check_adios_err(myrank,adios_err)
+
+
+  ! mesh coordinates
+  local_dim = nglob 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  sel_num = sel_num+1
+  sel => selections(sel_num)
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "xstore/array", 0, 1, &
+      xstore, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "ystore/array", 0, 1, &
+      ystore, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "zstore/array", 0, 1, &
+      zstore, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "rmassz/array", 0, 1, &
+      rmassz, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !call adios_perform_reads(adios_handle, adios_err)
+  !call check_adios_err(myrank,adios_err)
+
+  local_dim = NGLLX * NGLLY * NGLLZ * nspec_iso
+  start(1) = local_dim*myrank; count(1) = local_dim
+  sel_num = sel_num+1
+  sel => selections(sel_num)
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "rhostore/array", 0, 1, &
+      rhostore, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "kappavstore/array", 0, 1, &
+      kappavstore, adios_err)
+  call check_adios_err(myrank,adios_err)
+  if(READ_KAPPA_MU) then 
+    call adios_schedule_read(adios_handle, sel, "muvstore/array", 0, 1, &
+        muvstore, adios_err)
+    call check_adios_err(myrank,adios_err)
+  endif
+
+  !call adios_perform_reads(adios_handle, adios_err)
+  !call check_adios_err(myrank,adios_err)
+
+  if(TRANSVERSE_ISOTROPY_VAL .and. READ_TISO) then
+    local_dim = NGLLX * NGLLY * NGLLZ * nspec_tiso
+    start(1) = local_dim*myrank; count(1) = local_dim
+    sel_num = sel_num+1
+    sel => selections(sel_num)
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, "kappahstore/array", 0, 1, &
+        kappahstore, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "muhstore/array", 0, 1, &
+        muhstore, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "eta_anisostore/array", 0, 1, &
+        eta_anisostore, adios_err)
+    call check_adios_err(myrank,adios_err)
+  endif
+
+  !call adios_perform_reads(adios_handle, adios_err)
+  !call check_adios_err(myrank,adios_err)
+
+  local_dim = nspec
+  start(1) = local_dim*myrank; count(1) = local_dim
+  sel_num = sel_num+1
+  sel => selections(sel_num)
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "idoubling/array", 0, 1, &
+      idoubling, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "ispec_is_tiso/array", 0, 1, &
+      ispec_is_tiso, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !call adios_perform_reads(adios_handle, adios_err)
+  !call check_adios_err(myrank,adios_err)
+
+  local_dim = NGLLX * NGLLY * NGLLZ * nspec
+  start(1) = local_dim*myrank; count(1) = local_dim
+  sel_num = sel_num+1
+  sel => selections(sel_num)
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "ibool/array", 0, 1, &
+      ibool, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "xixstore/array", 0, 1, &
+      xix, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "xiystore/array", 0, 1, &
+      xiy, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "xizstore/array", 0, 1, &
+      xiz, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "etaxstore/array", 0, 1, &
+      etax, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "etaystore/array", 0, 1, &
+      etay, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "etazstore/array", 0, 1, &
+      etaz, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "gammaxstore/array", 0, 1, &
+      gammax, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "gammaystore/array", 0, 1, &
+      gammay, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "gammazstore/array", 0, 1, &
+      gammaz, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !call adios_perform_reads(adios_handle, adios_err)
+  !call check_adios_err(myrank,adios_err)
+
+
+  if(ANISOTROPIC_INNER_CORE_VAL .and. iregion_code == IREGION_INNER_CORE) then
+    local_dim = NGLLX * NGLLY * NGLLZ * nspec_ani 
+    start(1) = local_dim*myrank; count(1) = local_dim
+    sel_num = sel_num+1
+    sel => selections(sel_num)
+    call adios_selection_boundingbox (sel , 1, start, count)
+
+    call adios_schedule_read(adios_handle, sel, "c11store/array", 0, 1, &
+        c11store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c12store/array", 0, 1, &
+        c12store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c13store/array", 0, 1, &
+        c13store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c33store/array", 0, 1, &
+        c33store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c44store/array", 0, 1, &
+        c44store, adios_err)
+    call check_adios_err(myrank,adios_err)
+  endif
+
+  if(ANISOTROPIC_3D_MANTLE_VAL .and. iregion_code == IREGION_CRUST_MANTLE) then
+    local_dim = NGLLX * NGLLY * NGLLZ * nspec_ani 
+    start(1) = local_dim*myrank; count(1) = local_dim
+    sel_num = sel_num+1
+    sel => selections(sel_num)
+    call adios_selection_boundingbox (sel , 1, start, count)
+
+    call adios_schedule_read(adios_handle, sel, "c11store/array", 0, 1, &
+        c11store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c12store/array", 0, 1, &
+        c12store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c13store/array", 0, 1, &
+        c13store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c14store/array", 0, 1, &
+        c14store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c15store/array", 0, 1, &
+        c15store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c16store/array", 0, 1, &
+        c16store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c22store/array", 0, 1, &
+        c22store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c23store/array", 0, 1, &
+        c23store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c24store/array", 0, 1, &
+        c24store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c25store/array", 0, 1, &
+        c25store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c26store/array", 0, 1, &
+        c26store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c33store/array", 0, 1, &
+        c33store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c34store/array", 0, 1, &
+        c34store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c35store/array", 0, 1, &
+        c35store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c36store/array", 0, 1, &
+        c36store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c44store/array", 0, 1, &
+        c44store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c45store/array", 0, 1, &
+        c45store, adios_err)
+    call adios_schedule_read(adios_handle, sel, "c46store/array", 0, 1, &
+        c46store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c55store/array", 0, 1, &
+        c55store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c56store/array", 0, 1, &
+        c56store, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "c66store/array", 0, 1, &
+        c66store, adios_err)
+    call check_adios_err(myrank,adios_err)
+  endif
+
+  !call adios_perform_reads(adios_handle, adios_err)
+  !call check_adios_err(myrank,adios_err)
+
+  ! Stacey
+  if(ABSORBING_CONDITIONS) then
+    local_dim = NGLLX * NGLLY * NGLLZ * nspec ! nspec_stacey in meshfem3D
+    start(1) = local_dim*myrank; count(1) = local_dim
+    sel_num = sel_num+1
+    sel => selections(sel_num)
+    call adios_selection_boundingbox (sel , 1, start, count)
+
+    if(iregion_code == IREGION_CRUST_MANTLE) then
+      call adios_schedule_read(adios_handle, sel, "rho_vp/array", 0, 1, &
+          rho_vp, adios_err)
+      call check_adios_err(myrank,adios_err)
+      call adios_schedule_read(adios_handle, sel, "rho_vs/array", 0, 1, &
+          rho_vs, adios_err)
+      call check_adios_err(myrank,adios_err)
+    else if(iregion_code == IREGION_OUTER_CORE) then
+      call adios_schedule_read(adios_handle, sel, "rho_vp/array", 0, 1, &
+          rho_vp, adios_err)
+      call check_adios_err(myrank,adios_err)
+    endif
+
+  endif
+
+  ! mass matrices
+  !
+  ! in the case of stacey boundary conditions, add C*deltat/2 contribution to
+  ! the mass matrix on Stacey edges for the crust_mantle and outer_core regions
+  ! but not for the inner_core region thus the mass matrix must be replaced by
+  ! three mass matrices including the "C" damping matrix 
+  !
+  ! if absorbing_conditions are not set or if NCHUNKS=6, only one mass matrix
+  ! is needed for the sake of performance, only "rmassz" array will be filled
+  ! and "rmassx" & "rmassy" will be obsolete
+  !call adios_perform_reads(adios_handle, adios_err)
+  !call check_adios_err(myrank,adios_err)
+
+  if(NCHUNKS_VAL /= 6 .and. ABSORBING_CONDITIONS .and. &
+      iregion_code == IREGION_CRUST_MANTLE) then
+
+    local_dim = nglob_xy
+    start(1) = local_dim*myrank; count(1) = local_dim
+    sel_num = sel_num+1
+    sel => selections(sel_num)
+    call adios_selection_boundingbox (sel , 1, start, count)
+
+    call adios_schedule_read(adios_handle, sel, "rmassx/array", 0, 1, &
+        rmassx, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "rmassy/array", 0, 1, &
+        rmassy, adios_err)
+    call check_adios_err(myrank,adios_err)
+  endif
+
+  !call adios_perform_reads(adios_handle, adios_err)
+  !call check_adios_err(myrank,adios_err)
+
+  ! read additional ocean load mass matrix
+  if(OCEANS_VAL .and. iregion_code == IREGION_CRUST_MANTLE) then
+    local_dim = NGLOB_CRUST_MANTLE_OCEANS ! nglob_oceans
+    start(1) = local_dim*myrank; count(1) = local_dim
+    sel_num = sel_num+1
+    sel => selections(sel_num)
+    call adios_selection_boundingbox (sel , 1, start, count)
+
+    call adios_schedule_read(adios_handle, sel, "rmass_ocean_load/array", &
+        0, 1, rmass_ocean_load, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    !call adios_perform_reads(adios_handle, adios_err)
+    !call check_adios_err(myrank,adios_err)
+  endif
+
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+
+  ! Clean everything and close the ADIOS file
+  do i = 1, sel_num
+    sel => selections(i)
+    call adios_selection_delete(sel)
+  enddo
+  call adios_read_close(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_finalize_method(ADIOS_READ_METHOD_BP, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call MPI_Barrier(comm, ierr)
+  ! checks dimensions
+  if( lnspec /= nspec ) then
+    print*,'error file dimension: nspec in file = ',lnspec, &
+        ' but nspec desired:',nspec
+    print*,'please check file ', file_name
+    call exit_mpi(myrank,'error dimensions in solver_data.bp')
+  endif
+  if( lnglob /= nglob ) then
+    print*,'error file dimension: nglob in file = ',lnglob, &
+        ' but nglob desired:',nglob
+    print*,'please check file ', file_name
+    call exit_mpi(myrank,'error dimensions in solver_data.bp')
+  endif
+
+end subroutine read_arrays_solver_adios

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_attenuation_adios.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_attenuation_adios.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_attenuation_adios.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,127 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+!===============================================================================
+!> \brief Read adios attenuation arrays created by the mesher
+!         (regX_attenuation.bp)
+subroutine read_attenuation_adios(myrank, prname, &
+   factor_common, scale_factor, tau_s, vx, vy, vz, vnspec, T_c_source)
+
+  use adios_read_mod
+  use specfem_par,only: ATTENUATION_VAL
+
+  implicit none
+
+  include 'constants.h'
+  include 'mpif.h'
+
+  integer :: myrank
+
+  integer :: vx,vy,vz,vnspec
+  double precision, dimension(vx,vy,vz,vnspec)       :: scale_factor
+  double precision, dimension(N_SLS,vx,vy,vz,vnspec) :: factor_common
+  double precision, dimension(N_SLS)                 :: tau_s
+
+  character(len=150) :: prname
+
+  ! local parameters
+  integer :: i,j,k,ispec,ier
+  double precision, dimension(N_SLS) :: tau_e, fc
+  double precision :: omsb, Q_mu, sf, T_c_source, scale_t
+  integer :: sizeprocs, comm, ierr
+  character(len=150) :: file_name
+  integer(kind=8) :: group_size_inc
+  integer :: local_dim, global_dim, offset
+  ! ADIOS variables
+  integer                 :: adios_err
+  integer(kind=8)         :: adios_group, adios_handle, varid, sel
+  integer(kind=8)         :: adios_groupsize, adios_totalsize
+  integer :: vars_count, attrs_count, current_step, last_step, vsteps
+  character(len=128), dimension(:), allocatable :: adios_names 
+  integer(kind=8), dimension(1) :: start, count
+
+  ! checks if attenuation is on and anything to do
+  if( .not. ATTENUATION_VAL) return
+
+  call MPI_Comm_dup (MPI_COMM_WORLD, comm, ierr)
+
+  ! All of the following reads use the output parameters as their temporary arrays
+  ! use the filename to determine the actual contents of the read
+  file_name= trim(prname) // "attenuation.bp" 
+
+  call adios_read_init_method (ADIOS_READ_METHOD_BP, comm, &
+      "verbose=1", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_open_file (adios_handle, file_name, 0, comm, ierr)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_selection_writeblock(sel, myrank)
+  call adios_schedule_read(adios_handle, sel, "T_c_source", 0, 1, &
+     T_c_source, adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = size (tau_s) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "tau_s/array", 0, 1, &
+    tau_s, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = size (factor_common) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "tau_e_store/array", 0, 1, &
+    factor_common, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = size (scale_factor) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "Qmu_store/array", 0, 1, &
+    scale_factor, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  ! Close ADIOS handler to the restart file.
+  call adios_selection_delete(sel)
+  call adios_read_close(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_finalize_method(ADIOS_READ_METHOD_BP, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+end subroutine read_attenuation_adios

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_forward_arrays_adios.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_forward_arrays_adios.F90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_forward_arrays_adios.F90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,442 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!-------------------------------------------------------------------------------
+!> \file read_forward_arrays_adios.F90
+!! \brief Read saved forward arrays with the help of the ADIOS library.
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+!> \brief Read forward arrays from an ADIOS file.
+!> \note read_intermediate_forward_arrays_adios() 
+!!       and read_forward_arrays_adios() are not factorized, because
+!>       the latest read the bp file in "b_" prefixed arrays
+subroutine read_intermediate_forward_arrays_adios()
+  ! External imports
+  use mpi
+  use adios_read_mod
+  ! Internal imports
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+  ! Local parameters
+  integer :: sizeprocs, comm, ierr
+  character(len=150) :: file_name
+  integer(kind=8) :: group_size_inc
+  integer :: local_dim, global_dim, offset
+!  integer, parameter :: num_arrays = 9 ! TODO correct number 
+!  character(len=256), dimension(num_arrays) :: local_dims1, local_dims2, &
+!      global_dims1, global_dims2, offsets1, offsets2, array_name
+  ! ADIOS variables
+  integer                 :: adios_err
+  integer(kind=8)         :: adios_group, adios_handle, varid, sel
+  integer(kind=8)         :: adios_groupsize, adios_totalsize
+  integer :: vars_count, attrs_count, current_step, last_step, vsteps
+  character(len=128), dimension(:), allocatable :: adios_names 
+  integer(kind=8), dimension(1) :: start, count
+
+
+  file_name = trim(LOCAL_TMP_PATH) // "/dump_all_arrays_adios.bp" 
+  call world_size(sizeprocs)
+  call MPI_Comm_dup (MPI_COMM_WORLD, comm, ierr)
+
+  call adios_read_init_method (ADIOS_READ_METHOD_BP, comm, &
+      "verbose=1", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_open_file (adios_handle, file_name, 0, comm, ierr)
+  call check_adios_err(myrank,adios_err)
+  
+
+  local_dim = NDIM * NGLOB_CRUST_MANTLE 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "displ_crust_mantle/array", 0, 1, &
+      displ_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "veloc_crust_mantle/array", 0, 1, &
+      veloc_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "accel_crust_mantle/array", 0, 1, &
+      accel_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  ! NOTE: perform reads before changing selection, otherwise it will segfault
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  
+  local_dim = NDIM * NGLOB_INNER_CORE
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "displ_inner_core/array", 0, 1, &
+      displ_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "veloc_inner_core/array", 0, 1, &
+      veloc_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "accel_inner_core/array", 0, 1, &
+      accel_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = NGLOB_OUTER_CORE
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "displ_outer_core/array", 0, 1, &
+      displ_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "veloc_outer_core/array", 0, 1, &
+      veloc_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "accel_outer_core/array", 0, 1, &
+      accel_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = NGLLX * NGLLY * NGLLZ * NSPEC_CRUST_MANTLE_STR_OR_ATT
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_xx_crust_mantle/array",&
+      0, 1, epsilondev_xx_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_yy_crust_mantle/array",&
+      0, 1, epsilondev_yy_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_xy_crust_mantle/array",&
+      0, 1, epsilondev_xy_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_xz_crust_mantle/array",&
+      0, 1, epsilondev_xz_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_yz_crust_mantle/array",&
+      0, 1, epsilondev_yz_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = NGLLX * NGLLY * NGLLZ * NSPEC_INNER_CORE_STR_OR_ATT
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_xx_inner_core/array",&
+      0, 1, epsilondev_xx_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_yy_inner_core/array",&
+      0, 1, epsilondev_yy_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_xy_inner_core/array",&
+      0, 1, epsilondev_xy_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_xz_inner_core/array",&
+      0, 1, epsilondev_xz_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_yz_inner_core/array",&
+      0, 1, epsilondev_yz_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = NGLLX * NGLLY * NGLLZ * NSPEC_OUTER_CORE_ROTATION
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "A_array_rotation/array", 0, 1, &
+      A_array_rotation, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "B_array_rotation/array", 0, 1, &
+      B_array_rotation, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = N_SLS*NGLLX*NGLLY*NGLLZ*NSPEC_CRUST_MANTLE_ATTENUAT
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "R_xx_crust_mantle/array", 0, 1, &
+      R_xx_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "R_yy_crust_mantle/array", 0, 1, &
+      R_yy_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "R_xy_crust_mantle/array", 0, 1, &
+      R_xy_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "R_xz_crust_mantle/array", 0, 1, &
+      R_xz_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "R_yz_crust_mantle/array", 0, 1, &
+      R_yz_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = N_SLS*NGLLX*NGLLY*NGLLZ*NSPEC_INNER_CORE_ATTENUATION
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "R_xx_inner_core/array", 0, 1, &
+      R_xx_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "R_yy_inner_core/array", 0, 1, &
+      R_yy_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "R_xy_inner_core/array", 0, 1, &
+      R_xy_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "R_xz_inner_core/array", 0, 1, &
+      R_xz_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "R_yz_inner_core/array", 0, 1, &
+      R_yz_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  ! Close ADIOS handler to the restart file.
+  call adios_selection_delete(sel)
+  call adios_read_close(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_finalize_method(ADIOS_READ_METHOD_BP, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call MPI_Barrier(comm, ierr)
+
+end subroutine read_intermediate_forward_arrays_adios
+
+!-------------------------------------------------------------------------------
+!> \brief Read forward arrays from an ADIOS file.
+!> \note read_intermediate_forward_arrays_adios() 
+!!       and read_forward_arrays_adios() are not factorized, because
+!>       the latest read the bp file in "b_" prefixed arrays
+subroutine read_forward_arrays_adios()
+  ! External imports
+  use mpi
+  use adios_read_mod
+  ! Internal imports
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+  ! Local parameters
+  integer :: sizeprocs, comm, ierr
+  character(len=150) :: file_name
+  integer(kind=8) :: group_size_inc
+  integer :: local_dim, global_dim, offset
+  ! ADIOS variables
+  integer                 :: adios_err
+  integer(kind=8)         :: adios_group, adios_handle, varid, sel
+  integer(kind=8)         :: adios_groupsize, adios_totalsize
+  integer :: vars_count, attrs_count, current_step, last_step, vsteps
+  character(len=128), dimension(:), allocatable :: adios_names 
+  integer(kind=8), dimension(1) :: start, count
+
+
+  file_name = trim(LOCAL_TMP_PATH) // "/save_forward_arrays.bp" 
+  call world_size(sizeprocs)
+  call MPI_Comm_dup (MPI_COMM_WORLD, comm, ierr)
+
+  call adios_read_init_method (ADIOS_READ_METHOD_BP, comm, &
+      "verbose=1", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_open_file (adios_handle, file_name, 0, comm, ierr)
+  call check_adios_err(myrank,adios_err)
+  
+
+  local_dim = NDIM * NGLOB_CRUST_MANTLE 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "displ_crust_mantle/array", 0, 1, &
+      b_displ_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "veloc_crust_mantle/array", 0, 1, &
+      b_veloc_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "accel_crust_mantle/array", 0, 1, &
+      b_accel_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  ! NOTE: perform reads before changing selection, otherwise it will segfault
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  
+  local_dim = NDIM * NGLOB_INNER_CORE
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "displ_inner_core/array", 0, 1, &
+      b_displ_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "veloc_inner_core/array", 0, 1, &
+      b_veloc_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "accel_inner_core/array", 0, 1, &
+      b_accel_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = NGLOB_OUTER_CORE
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "displ_outer_core/array", 0, 1, &
+      b_displ_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "veloc_outer_core/array", 0, 1, &
+      b_veloc_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "accel_outer_core/array", 0, 1, &
+      b_accel_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = NGLLX * NGLLY * NGLLZ * NSPEC_CRUST_MANTLE_STR_OR_ATT
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_xx_crust_mantle/array",&
+      0, 1, b_epsilondev_xx_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_yy_crust_mantle/array",&
+      0, 1, b_epsilondev_yy_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_xy_crust_mantle/array",&
+      0, 1, b_epsilondev_xy_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_xz_crust_mantle/array",&
+      0, 1, b_epsilondev_xz_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_yz_crust_mantle/array",&
+      0, 1, b_epsilondev_yz_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = NGLLX * NGLLY * NGLLZ * NSPEC_INNER_CORE_STR_OR_ATT
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_xx_inner_core/array",&
+      0, 1, b_epsilondev_xx_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_yy_inner_core/array",&
+      0, 1, b_epsilondev_yy_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_xy_inner_core/array",&
+      0, 1, b_epsilondev_xy_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_xz_inner_core/array",&
+      0, 1, b_epsilondev_xz_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "epsilondev_yz_inner_core/array",&
+      0, 1, b_epsilondev_yz_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  if (ROTATION_VAL) then
+    local_dim = NGLLX * NGLLY * NGLLZ * NSPEC_OUTER_CORE_ROTATION
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, "A_array_rotation/array", 0, 1, &
+        b_A_array_rotation, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "B_array_rotation/array", 0, 1, &
+        b_B_array_rotation, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+  endif
+
+  if (ATTENUATION_VAL) then
+    local_dim = N_SLS*NGLLX*NGLLY*NGLLZ*NSPEC_CRUST_MANTLE_ATTENUAT
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, "R_xx_crust_mantle/array", 0, 1, &
+        b_R_xx_crust_mantle, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "R_yy_crust_mantle/array", 0, 1, &
+        b_R_yy_crust_mantle, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "R_xy_crust_mantle/array", 0, 1, &
+        b_R_xy_crust_mantle, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "R_xz_crust_mantle/array", 0, 1, &
+        b_R_xz_crust_mantle, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "R_yz_crust_mantle/array", 0, 1, &
+        b_R_yz_crust_mantle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    local_dim = N_SLS*NGLLX*NGLLY*NGLLZ*NSPEC_INNER_CORE_ATTENUATION
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, "R_xx_inner_core/array", 0, 1, &
+        b_R_xx_inner_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "R_yy_inner_core/array", 0, 1, &
+        b_R_yy_inner_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "R_xy_inner_core/array", 0, 1, &
+        b_R_xy_inner_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "R_xz_inner_core/array", 0, 1, &
+        b_R_xz_inner_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "R_yz_inner_core/array", 0, 1, &
+        b_R_yz_inner_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+  endif
+
+  ! Close ADIOS handler to the restart file.
+  call adios_selection_delete(sel)
+  call adios_read_close(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_finalize_method(ADIOS_READ_METHOD_BP, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call MPI_Barrier(comm, ierr)
+
+end subroutine read_forward_arrays_adios

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_mesh_databases_adios.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_mesh_databases_adios.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_mesh_databases_adios.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,1438 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+!===============================================================================
+!> \brief Read adios boundary arrays created by the mesher
+!!        (file: regX_boundary.bp)
+subroutine read_mesh_databases_coupling_adios()
+  use adios_read_mod
+
+! to couple mantle with outer core
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+
+  include 'mpif.h'
+
+  ! local parameters
+  integer :: njunk1,njunk2,njunk3
+  integer :: sizeprocs, comm, ierr
+  character(len=150) :: file_name
+  integer(kind=8) :: group_size_inc
+  integer :: local_dim, global_dim, offset
+  ! ADIOS variables
+  integer                 :: adios_err
+  integer(kind=8)         :: adios_group, adios_handle, varid, sel
+  integer(kind=8)         :: adios_groupsize, adios_totalsize
+  integer :: vars_count, attrs_count, current_step, last_step, vsteps
+  character(len=128), dimension(:), allocatable :: adios_names 
+  integer(kind=8), dimension(1) :: start, count
+
+  call MPI_Comm_dup (MPI_COMM_WORLD, comm, ierr)
+
+  ! crust and mantle
+  ! create name of database
+  call create_name_database_adios(prname, IREGION_CRUST_MANTLE, LOCAL_PATH)
+  file_name= trim(prname) // "boundary.bp" 
+
+  call adios_read_init_method (ADIOS_READ_METHOD_BP, comm, &
+      "verbose=1", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_open_file (adios_handle, file_name, 0, comm, ierr)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_selection_writeblock(sel, myrank)
+  call adios_schedule_read(adios_handle, sel, "nspec2D_xmin", 0, 1, &
+     nspec2D_xmin_crust_mantle, adios_err)
+  call adios_schedule_read(adios_handle, sel, "nspec2D_xmax", 0, 1, &
+     nspec2D_xmax_crust_mantle, adios_err)
+  call adios_schedule_read(adios_handle, sel, "nspec2D_ymin", 0, 1, &
+     nspec2D_ymin_crust_mantle, adios_err)
+  call adios_schedule_read(adios_handle, sel, "nspec2D_ymax", 0, 1, &
+     nspec2D_ymax_crust_mantle, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec2D_xmin", &
+      !nspec2D_xmin_crust_mantle, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec2D_xmax", &
+      !nspec2D_xmax_crust_mantle, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec2D_ymin", &
+      !nspec2D_ymin_crust_mantle, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec2D_ymax", &
+      !nspec2D_ymax_crust_mantle, adios_err)
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NSPEC2DMAX_XMIN_XMAX_CM
+  local_dim = size (ibelm_xmin_crust_mantle) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "ibelm_xmin/array", 0, 1, &
+    ibelm_xmin_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "ibelm_xmax/array", 0, 1, &
+    ibelm_xmax_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NSPEC2DMAX_YMIN_YMAX_CM
+  local_dim = size (ibelm_ymin_crust_mantle) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "ibelm_ymin/array", 0, 1, &
+    ibelm_ymin_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "ibelm_ymax/array", 0, 1, &
+    ibelm_ymax_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NSPEC2D_BOTTOM_CM
+  local_dim = size (ibelm_bottom_crust_mantle) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "ibelm_bottom/array", 0, 1, &
+    ibelm_bottom_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NSPEC2D_TOP_CM
+  local_dim = size (ibelm_top_crust_mantle) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "ibelm_top/array", 0, 1, &
+    ibelm_top_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NDIM*NGLLY*NGLLZ*NSPEC2DMAX_XMIN_XMAX_CM
+  local_dim = size (normal_xmin_crust_mantle) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "normal_xmin/array", 0, 1, &
+    normal_xmin_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "normal_xmax/array", 0, 1, &
+    normal_xmax_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NDIM*NGLLX*NGLLZ*NSPEC2DMAX_YMIN_YMAX_CM
+  local_dim = size (normal_ymin_crust_mantle) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "normal_ymin/array", 0, 1, &
+    normal_ymin_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "normal_ymax/array", 0, 1, &
+    normal_ymax_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NDIM*NGLLX*NGLLY*NSPEC2D_BOTTOM_CM
+  local_dim = size (ibelm_bottom_crust_mantle) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "normal_bottom/array", 0, 1, &
+    normal_bottom_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NDIM*NGLLX*NGLLY*NSPEC2D_TOP_CM
+  local_dim = size (ibelm_top_crust_mantle) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "normal_top/array", 0, 1, &
+    normal_top_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NGLLY*NGLLZ*NSPEC2DMAX_XMIN_XMAX_CM
+  local_dim = size (jacobian2D_xmin_crust_mantle) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "jacobian2D_xmin/array", 0, 1, &
+    jacobian2D_xmin_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "jacobian2D_xmax/array", 0, 1, &
+    jacobian2D_xmax_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NGLLX*NGLLZ*NSPEC2DMAX_YMIN_YMAX_CM
+  local_dim = size (jacobian2D_ymin_crust_mantle) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "jacobian2D_ymin/array", 0, 1, &
+    jacobian2D_ymin_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "jacobian2D_ymax/array", 0, 1, &
+    jacobian2D_ymax_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NGLLX*NGLLY*NSPEC2D_BOTTOM_CM
+  local_dim = size (jacobian2D_bottom_crust_mantle) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "jacobian2D_bottom/array", 0, 1, &
+    jacobian2D_bottom_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NGLLX*NGLLY*NSPEC2D_TOP_CM
+  local_dim = size (jacobian2D_top_crust_mantle) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "jacobian2D_top/array", 0, 1, &
+    jacobian2D_top_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+! boundary parameters
+
+  ! Close ADIOS handler to the restart file.
+  call adios_selection_delete(sel)
+  call adios_read_close(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_finalize_method(ADIOS_READ_METHOD_BP, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  ! read parameters to couple fluid and solid regions
+  !
+  ! outer core
+
+  ! create name of database
+  call create_name_database_adios(prname, IREGION_OUTER_CORE, LOCAL_PATH)
+  file_name= trim(prname) // "boundary.bp" 
+
+  call adios_read_init_method (ADIOS_READ_METHOD_BP, comm, &
+      "verbose=1", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_open_file (adios_handle, file_name, 0, comm, ierr)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_selection_writeblock(sel, myrank)
+  call adios_schedule_read(adios_handle, sel, "nspec2D_xmin", 0, 1, &
+     nspec2D_xmin_outer_core, adios_err)
+  call adios_schedule_read(adios_handle, sel, "nspec2D_xmax", 0, 1, &
+     nspec2D_xmax_outer_core, adios_err)
+  call adios_schedule_read(adios_handle, sel, "nspec2D_ymin", 0, 1, &
+     nspec2D_ymin_outer_core, adios_err)
+  call adios_schedule_read(adios_handle, sel, "nspec2D_ymax", 0, 1, &
+     nspec2D_ymax_outer_core, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec2D_xmin", &
+      !nspec2D_xmin_outer_core, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec2D_xmax", &
+      !nspec2D_xmax_outer_core, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec2D_ymin", &
+      !nspec2D_ymin_outer_core, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec2D_ymax", &
+      !nspec2D_ymax_outer_core, adios_err)
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  nspec2D_zmin_outer_core = NSPEC2D_BOTTOM(IREGION_OUTER_CORE)
+
+  !local_dim = NSPEC2DMAX_XMIN_XMAX_OC
+  local_dim = size (ibelm_xmin_outer_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "ibelm_xmin/array", 0, 1, &
+    ibelm_xmin_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "ibelm_xmax/array", 0, 1, &
+    ibelm_xmax_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NSPEC2DMAX_YMIN_YMAX_OC
+  local_dim = size (ibelm_ymin_outer_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "ibelm_ymin/array", 0, 1, &
+    ibelm_ymin_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "ibelm_ymax/array", 0, 1, &
+    ibelm_ymax_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NSPEC2D_BOTTOM_OC
+  local_dim = size (ibelm_bottom_outer_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "ibelm_bottom/array", 0, 1, &
+    ibelm_bottom_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NSPEC2D_TOP_OC
+  local_dim = size (ibelm_top_outer_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "ibelm_top/array", 0, 1, &
+    ibelm_top_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NDIM*NGLLY*NGLLZ*NSPEC2DMAX_XMIN_XMAX_OC
+  local_dim = size (normal_xmin_outer_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "normal_xmin/array", 0, 1, &
+    normal_xmin_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "normal_xmax/array", 0, 1, &
+    normal_xmax_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NDIM*NGLLX*NGLLZ*NSPEC2DMAX_YMIN_YMAX_OC
+  local_dim = size (normal_ymin_outer_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "normal_ymin/array", 0, 1, &
+    normal_ymin_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "normal_ymax/array", 0, 1, &
+    normal_ymax_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NDIM*NGLLX*NGLLY*NSPEC2D_BOTTOM_OC
+  local_dim = size (normal_bottom_outer_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "normal_bottom/array", 0, 1, &
+    normal_bottom_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NDIM*NGLLX*NGLLY*NSPEC2D_TOP_OC
+  local_dim = size (normal_top_outer_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "normal_top/array", 0, 1, &
+    normal_top_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NGLLY*NGLLZ*NSPEC2DMAX_XMIN_XMAX_OC
+  local_dim = size (jacobian2D_xmin_outer_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "jacobian2D_xmin/array", 0, 1, &
+    jacobian2D_xmin_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "jacobian2D_xmax/array", 0, 1, &
+    jacobian2D_xmax_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NGLLX*NGLLZ*NSPEC2DMAX_YMIN_YMAX_OC
+  local_dim = size (jacobian2D_ymin_outer_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "jacobian2D_ymin/array", 0, 1, &
+    jacobian2D_ymin_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "jacobian2D_ymax/array", 0, 1, &
+    jacobian2D_ymax_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+
+  !local_dim = NGLLX*NGLLY*NSPEC2D_BOTTOM_OC
+  local_dim = size (jacobian2D_bottom_outer_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "jacobian2D_bottom/array", 0, 1, &
+    jacobian2D_bottom_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NGLLX*NGLLY*NSPEC2D_TOP_OC
+  local_dim = size (jacobian2D_top_outer_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "jacobian2D_top/array", 0, 1, &
+    jacobian2D_top_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  ! boundary parameters
+
+  ! Close ADIOS handler to the restart file.
+  call adios_selection_delete(sel)
+  call adios_read_close(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_finalize_method(ADIOS_READ_METHOD_BP, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+
+  ! inner core
+
+  ! create name of database
+  call create_name_database_adios(prname, IREGION_INNER_CORE, LOCAL_PATH)
+  file_name= trim(prname) // "boundary.bp" 
+
+  call adios_read_init_method (ADIOS_READ_METHOD_BP, comm, &
+      "verbose=1", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_open_file (adios_handle, file_name, 0, comm, ierr)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_selection_writeblock(sel, myrank)
+  call adios_schedule_read(adios_handle, sel, "nspec2D_xmin", 0, 1, &
+     nspec2D_xmin_inner_core, adios_err)
+  call adios_schedule_read(adios_handle, sel, "nspec2D_xmax", 0, 1, &
+     nspec2D_xmax_inner_core, adios_err)
+  call adios_schedule_read(adios_handle, sel, "nspec2D_ymin", 0, 1, &
+     nspec2D_ymin_inner_core, adios_err)
+  call adios_schedule_read(adios_handle, sel, "nspec2D_ymax", 0, 1, &
+     nspec2D_ymax_inner_core, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec2D_xmin", &
+      !nspec2D_xmin_inner_core, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec2D_xmax", &
+      !nspec2D_xmax_inner_core, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec2D_ymin", &
+      !nspec2D_ymin_inner_core, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec2D_ymax", &
+      !nspec2D_ymax_inner_core, adios_err)
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NSPEC2DMAX_XMIN_XMAX_IC
+  local_dim = size (ibelm_xmin_inner_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "ibelm_xmin/array", 0, 1, &
+    ibelm_xmin_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "ibelm_xmax/array", 0, 1, &
+    ibelm_xmax_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NSPEC2DMAX_YMIN_YMAX_IC
+  local_dim = size (ibelm_ymin_inner_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "ibelm_ymin/array", 0, 1, &
+    ibelm_ymin_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_schedule_read(adios_handle, sel, "ibelm_ymax/array", 0, 1, &
+    ibelm_ymax_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NSPEC2D_BOTTOM_IC
+  local_dim = size (ibelm_bottom_inner_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "ibelm_bottom/array", 0, 1, &
+    ibelm_bottom_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  !local_dim = NSPEC2D_TOP_IC
+  local_dim = size (ibelm_top_inner_core) 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "ibelm_top/array", 0, 1, &
+    ibelm_top_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  ! Close ADIOS handler to the restart file.
+  call adios_selection_delete(sel)
+  call adios_read_close(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_finalize_method(ADIOS_READ_METHOD_BP, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  ! -- Boundary Mesh for crust and mantle ---
+  if (SAVE_BOUNDARY_MESH .and. SIMULATION_TYPE == 3) then
+    file_name = LOCAL_PATH // "boundary_disc.bp" 
+    call adios_read_init_method (ADIOS_READ_METHOD_BP, comm, &
+        "verbose=1", adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_read_open_file (adios_handle, file_name, 0, comm, ierr)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_selection_writeblock(sel, myrank)
+    call adios_schedule_read(adios_handle, sel, "NSPEC2D_MOHO", 0, 1, &
+       njunk1, adios_err)
+    call adios_schedule_read(adios_handle, sel, "NSPEC2D_400", 0, 1, &
+       njunk2, adios_err)
+    call adios_schedule_read(adios_handle, sel, "NSPEC2D_670", 0, 1, &
+       njunk3, adios_err)
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+    if (njunk1 /= NSPEC2D_MOHO .and. njunk2 /= NSPEC2D_400 .and. &
+        njunk3 /= NSPEC2D_670) &
+        call exit_mpi(myrank, 'Error reading boundary_disc.bp file')
+
+    local_dim = NSPEC2D_MOHO
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, "ibelm_moho_top/array", 0, 1, &
+      ibelm_moho_bot, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "ibelm_moho_bot/array", 0, 1, &
+      ibelm_moho_top, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    local_dim = NSPEC2D_400
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, "ibelm_400_top/array", 0, 1, &
+      ibelm_400_bot, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "ibelm_400_bot/array", 0, 1, &
+      ibelm_400_top, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    local_dim = NSPEC2D_670
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, "ibelm_670_top/array", 0, 1, &
+      ibelm_670_bot, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "ibelm_670_bot/array", 0, 1, &
+      ibelm_670_top, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    local_dim = NDIM*NGLLX*NGLLY*NSPEC2D_MOHO
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, "normal_moho/array", 0, 1, &
+      normal_moho, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    local_dim = NDIM*NGLLX*NGLLY*NSPEC2D_400
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, "normal_400/array", 0, 1, &
+      normal_400, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    local_dim = NDIM*NGLLX*NGLLY*NSPEC2D_670
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, "normal_670/array", 0, 1, &
+      normal_670, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    ! Close ADIOS handler to the restart file.
+    call adios_selection_delete(sel)
+    call adios_read_close(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_read_finalize_method(ADIOS_READ_METHOD_BP, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    k_top = 1
+    k_bot = NGLLZ
+
+    ! initialization
+    moho_kl = 0.; d400_kl = 0.; d670_kl = 0.; cmb_kl = 0.; icb_kl = 0.
+  endif
+
+end subroutine read_mesh_databases_coupling_adios
+
+subroutine read_mesh_databases_addressing_adios()
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+
+  include 'mpif.h'
+
+  ! local parameters
+  integer, dimension(NCHUNKS_VAL,0:NPROC_XI_VAL-1,0:NPROC_ETA_VAL-1) :: addressing
+  integer, dimension(0:NPROCTOT_VAL-1) :: ichunk_slice,iproc_xi_slice,iproc_eta_slice
+  integer :: ierr,iproc,iproc_read,iproc_xi,iproc_eta
+
+  ! open file with global slice number addressing
+  if(myrank == 0) then
+    open(unit=IIN,file=trim(OUTPUT_FILES)//'/addressing.txt',status='old',action='read',iostat=ierr)
+    if( ierr /= 0 ) call exit_mpi(myrank,'error opening addressing.txt')
+
+    do iproc = 0,NPROCTOT_VAL-1
+      read(IIN,*) iproc_read,ichunk,iproc_xi,iproc_eta
+
+      if(iproc_read /= iproc) call exit_MPI(myrank,'incorrect slice number read')
+
+      addressing(ichunk,iproc_xi,iproc_eta) = iproc
+      ichunk_slice(iproc) = ichunk
+      iproc_xi_slice(iproc) = iproc_xi
+      iproc_eta_slice(iproc) = iproc_eta
+    enddo
+    close(IIN)
+  endif
+
+  ! broadcast the information read on the master to the nodes
+  call MPI_BCAST(addressing,NCHUNKS_VAL*NPROC_XI_VAL*NPROC_ETA_VAL,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
+  call MPI_BCAST(ichunk_slice,NPROCTOT_VAL,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
+  call MPI_BCAST(iproc_xi_slice,NPROCTOT_VAL,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
+  call MPI_BCAST(iproc_eta_slice,NPROCTOT_VAL,MPI_INTEGER,0,MPI_COMM_WORLD,ierr)
+
+  ! output a topology map of slices - fix 20x by nproc
+  if (myrank == 0 ) then
+    if( NCHUNKS_VAL == 6 .and. NPROCTOT_VAL < 1000 ) then
+      write(IMAIN,*) 'Spatial distribution of the slices'
+      do iproc_xi = NPROC_XI_VAL-1, 0, -1
+        write(IMAIN,'(20x)',advance='no')
+        do iproc_eta = NPROC_ETA_VAL -1, 0, -1
+          ichunk = CHUNK_AB
+          write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
+        enddo
+        write(IMAIN,'(1x)',advance='yes')
+      enddo
+      write(IMAIN, *) ' '
+      do iproc_xi = NPROC_XI_VAL-1, 0, -1
+        write(IMAIN,'(1x)',advance='no')
+        do iproc_eta = NPROC_ETA_VAL -1, 0, -1
+          ichunk = CHUNK_BC
+          write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
+        enddo
+        write(IMAIN,'(3x)',advance='no')
+        do iproc_eta = NPROC_ETA_VAL -1, 0, -1
+          ichunk = CHUNK_AC
+          write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
+        enddo
+        write(IMAIN,'(3x)',advance='no')
+        do iproc_eta = NPROC_ETA_VAL -1, 0, -1
+          ichunk = CHUNK_BC_ANTIPODE
+          write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
+        enddo
+        write(IMAIN,'(1x)',advance='yes')
+      enddo
+      write(IMAIN, *) ' '
+      do iproc_xi = NPROC_XI_VAL-1, 0, -1
+        write(IMAIN,'(20x)',advance='no')
+        do iproc_eta = NPROC_ETA_VAL -1, 0, -1
+          ichunk = CHUNK_AB_ANTIPODE
+          write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
+        enddo
+        write(IMAIN,'(1x)',advance='yes')
+      enddo
+      write(IMAIN, *) ' '
+      do iproc_xi = NPROC_XI_VAL-1, 0, -1
+        write(IMAIN,'(20x)',advance='no')
+        do iproc_eta = NPROC_ETA_VAL -1, 0, -1
+          ichunk = CHUNK_AC_ANTIPODE
+          write(IMAIN,'(i5)',advance='no') addressing(ichunk,iproc_xi,iproc_eta)
+        enddo
+        write(IMAIN,'(1x)',advance='yes')
+      enddo
+      write(IMAIN, *) ' '
+    endif
+  endif
+
+  ! determine chunk number and local slice coordinates using addressing
+  ! (needed for stacey conditions)
+  ichunk = ichunk_slice(myrank)
+
+end subroutine read_mesh_databases_addressing_adios
+
+
+!===============================================================================
+!> \brief Read crust mantle MPI arrays from an ADIOS file.
+subroutine read_mesh_databases_MPI_CM_adios()
+  ! External imports
+  use mpi
+  use adios_read_mod
+  ! Internal imports
+  use specfem_par
+  use specfem_par_crustmantle
+  implicit none
+
+  ! local parameters
+  integer :: sizeprocs, comm, ierr
+  character(len=150) :: file_name
+  integer(kind=8) :: group_size_inc
+  integer :: local_dim, global_dim, offset
+  ! ADIOS variables
+  integer                 :: adios_err
+  integer(kind=8)         :: adios_group, adios_handle, varid, sel
+  integer(kind=8)         :: adios_groupsize, adios_totalsize
+  integer :: vars_count, attrs_count, current_step, last_step, vsteps
+  character(len=128), dimension(:), allocatable :: adios_names 
+  integer(kind=8), dimension(1) :: start, count
+
+  ! create the name for the database of the current slide and region
+  call create_name_database_adios(prname, IREGION_CRUST_MANTLE, LOCAL_PATH)
+
+  file_name= trim(prname) // "solver_data_mpi.bp" 
+  call MPI_Comm_dup (MPI_COMM_WORLD, comm, ierr)
+
+  call adios_read_init_method (ADIOS_READ_METHOD_BP, comm, &
+      "verbose=1", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_open_file (adios_handle, file_name, 0, comm, ierr)
+  call check_adios_err(myrank,adios_err)
+
+  ! MPI interfaces
+  call adios_selection_writeblock(sel, myrank)
+  call adios_schedule_read(adios_handle, sel, "num_interfaces", 0, 1, &
+     num_interfaces_crust_mantle, adios_err)
+  !call adios_get_scalar(adios_handle, "num_interfaces", &
+      !num_interfaces_crust_mantle, adios_err)
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  allocate(my_neighbours_crust_mantle(num_interfaces_crust_mantle), &
+          nibool_interfaces_crust_mantle(num_interfaces_crust_mantle), &
+          stat=ierr)
+  if( ierr /= 0 ) call exit_mpi(myrank, &
+      'error allocating array my_neighbours_crust_mantle etc.')
+
+  if( num_interfaces_crust_mantle > 0 ) then
+    call adios_selection_writeblock(sel, myrank)
+    !call adios_get_scalar(adios_handle, "max_nibool_interfaces", &
+      !max_nibool_interfaces_cm, adios_err)
+    call adios_schedule_read(adios_handle, sel, "max_nibool_interfaces", 0, 1, &
+       max_nibool_interfaces_cm, adios_err)
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    allocate(ibool_interfaces_crust_mantle(max_nibool_interfaces_cm, &
+        num_interfaces_crust_mantle), stat=ierr)
+    if( ierr /= 0 ) call exit_mpi(myrank, &
+        'error allocating array ibool_interfaces_crust_mantle')
+
+    local_dim = num_interfaces_crust_mantle
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, "my_neighbours/array", 0, 1, &
+      my_neighbours_crust_mantle, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "nibool_interfaces/array", &
+      0, 1, nibool_interfaces_crust_mantle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    local_dim = max_nibool_interfaces_cm * num_interfaces_crust_mantle
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, &
+      "ibool_interfaces/array", 0, 1, &
+      ibool_interfaces_crust_mantle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+  else
+    ! dummy array
+    max_nibool_interfaces_cm = 0
+    allocate(ibool_interfaces_crust_mantle(0,0),stat=ierr)
+    if( ierr /= 0 ) call exit_mpi(myrank, &
+        'error allocating array dummy ibool_interfaces_crust_mantle')
+  endif
+
+  ! inner / outer elements
+  call adios_selection_writeblock(sel, myrank)
+  call adios_schedule_read(adios_handle, sel, "nspec_inner", &
+    0, 1, nspec_inner_crust_mantle, adios_err)
+  call adios_schedule_read(adios_handle, sel, "nspec_outer", &
+    0, 1, nspec_outer_crust_mantle, adios_err)
+  call adios_schedule_read(adios_handle, sel, "num_phase_ispec", &
+    0, 1, num_phase_ispec_crust_mantle, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec_inner", &
+      !nspec_inner_crust_mantle, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec_outer", &
+      !nspec_outer_crust_mantle, adios_err)
+  !call adios_get_scalar(adios_handle, "num_phase_ispec", &
+      !num_phase_ispec_crust_mantle, adios_err)
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  if( num_phase_ispec_crust_mantle < 0 ) &
+      call exit_mpi(myrank,'error num_phase_ispec_crust_mantle is < zero')
+
+  allocate(phase_ispec_inner_crust_mantle(num_phase_ispec_crust_mantle,2),&
+          stat=ierr)
+  if( ierr /= 0 ) call exit_mpi(myrank, &
+      'error allocating array phase_ispec_inner_crust_mantle')
+
+  if(num_phase_ispec_crust_mantle > 0 ) then
+    local_dim = num_phase_ispec_crust_mantle * 2
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, &
+      "phase_ispec_inner/array", 0, 1, &
+      phase_ispec_inner_crust_mantle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+  endif
+
+  ! mesh coloring for GPUs
+  if( USE_MESH_COLORING_GPU ) then
+    call adios_selection_writeblock(sel, myrank)
+    call adios_schedule_read(adios_handle, sel, "num_colors_outer", &
+      0, 1, num_colors_outer_crust_mantle, adios_err)
+    call adios_schedule_read(adios_handle, sel, "num_colors_inner", &
+      0, 1, num_colors_inner_crust_mantle, adios_err)
+    !call adios_get_scalar(adios_handle, "num_colors_outer", &
+        !num_colors_outer_crust_mantle, adios_err)
+    !call adios_get_scalar(adios_handle, "num_colors_inner", &
+        !num_colors_inner_crust_mantle, adios_err)
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+    ! colors
+
+    allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle +&
+        num_colors_inner_crust_mantle), stat=ierr)
+    if( ierr /= 0 ) &
+      call exit_mpi(myrank,'error allocating num_elem_colors_crust_mantle array')
+
+    local_dim = num_colors_outer_crust_mantle + num_colors_inner_crust_mantle 
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, &
+      "num_elem_colors/array", 0, 1, &
+      num_elem_colors_crust_mantle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+  else
+    ! allocates dummy arrays
+    num_colors_outer_crust_mantle = 0
+    num_colors_inner_crust_mantle = 0
+    allocate(num_elem_colors_crust_mantle(num_colors_outer_crust_mantle + &
+        num_colors_inner_crust_mantle), stat=ierr)
+    if( ierr /= 0 ) &
+      call exit_mpi(myrank, &
+          'error allocating num_elem_colors_crust_mantle array')
+  endif
+  ! Close ADIOS handler to the restart file.
+  call adios_selection_delete(sel)
+  call adios_read_close(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_finalize_method(ADIOS_READ_METHOD_BP, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call MPI_Barrier(comm, ierr)
+
+end subroutine read_mesh_databases_MPI_CM_adios
+
+!===============================================================================
+!> \brief Read outer core MPI arrays from an ADIOS file.
+subroutine read_mesh_databases_MPI_OC_adios()
+  use mpi
+  use adios_read_mod
+  use specfem_par
+  use specfem_par_outercore
+  implicit none
+
+  ! local parameters
+  integer :: sizeprocs, comm, ierr
+  character(len=150) :: file_name
+  integer(kind=8) :: group_size_inc
+  integer :: local_dim, global_dim, offset
+  ! ADIOS variables
+  integer                 :: adios_err
+  integer(kind=8)         :: adios_group, adios_handle, varid, sel
+  integer(kind=8)         :: adios_groupsize, adios_totalsize
+  integer :: vars_count, attrs_count, current_step, last_step, vsteps
+  character(len=128), dimension(:), allocatable :: adios_names 
+  integer(kind=8), dimension(1) :: start, count
+
+  ! create the name for the database of the current slide and region
+  call create_name_database_adios(prname, IREGION_OUTER_CORE, LOCAL_PATH)
+
+  file_name= trim(prname) // "solver_data_mpi.bp" 
+  call MPI_Comm_dup (MPI_COMM_WORLD, comm, ierr)
+
+  call adios_read_init_method (ADIOS_READ_METHOD_BP, comm, &
+      "verbose=1", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_open_file (adios_handle, file_name, 0, comm, ierr)
+  call check_adios_err(myrank,adios_err)
+
+  ! MPI interfaces
+  call adios_selection_writeblock(sel, myrank)
+  call adios_schedule_read(adios_handle, sel, "num_interfaces", &
+    0, 1, num_interfaces_outer_core, adios_err)
+  !call adios_get_scalar(adios_handle, "num_interfaces", &
+      !num_interfaces_outer_core, adios_err)
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  allocate(my_neighbours_outer_core(num_interfaces_outer_core), &
+          nibool_interfaces_outer_core(num_interfaces_outer_core), &
+          stat=ierr)
+  if( ierr /= 0 ) call exit_mpi(myrank, &
+      'error allocating array my_neighbours_outer_coreetc.')
+
+  if( num_interfaces_outer_core> 0 ) then
+    call adios_selection_writeblock(sel, myrank)
+    call adios_schedule_read(adios_handle, sel, "max_nibool_interfaces", &
+      0, 1, max_nibool_interfaces_oc, adios_err)
+    !call adios_get_scalar(adios_handle, "max_nibool_interfaces", &
+      !max_nibool_interfaces_oc, adios_err)
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    allocate(ibool_interfaces_outer_core(max_nibool_interfaces_oc, &
+        num_interfaces_outer_core), stat=ierr)
+    if( ierr /= 0 ) call exit_mpi(myrank, &
+        'error allocating array ibool_interfaces_outer_core')
+
+    local_dim = num_interfaces_outer_core
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, "my_neighbours/array", 0, 1, &
+      my_neighbours_outer_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "nibool_interfaces/array", &
+      0, 1, nibool_interfaces_outer_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    local_dim = max_nibool_interfaces_oc * num_interfaces_outer_core
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, &
+      "ibool_interfaces/array", 0, 1, &
+      ibool_interfaces_outer_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+  else
+    ! dummy array
+    max_nibool_interfaces_oc = 0
+    allocate(ibool_interfaces_outer_core(0,0),stat=ierr)
+    if( ierr /= 0 ) call exit_mpi(myrank, &
+        'error allocating array dummy ibool_interfaces_outer_core')
+  endif
+
+  ! inner / outer elements
+  call adios_selection_writeblock(sel, myrank)
+  call adios_schedule_read(adios_handle, sel, "nspec_inner", &
+  0, 1, nspec_inner_outer_core, adios_err)
+  call adios_schedule_read(adios_handle, sel, "nspec_outer", &
+    0, 1, nspec_outer_outer_core, adios_err)
+  call adios_schedule_read(adios_handle, sel, "num_phase_ispec", &
+    0, 1, num_phase_ispec_outer_core, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec_inner", &
+      !nspec_inner_outer_core, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec_outer", &
+      !nspec_outer_outer_core, adios_err)
+  !call adios_get_scalar(adios_handle, "num_phase_ispec", &
+      !num_phase_ispec_outer_core, adios_err)
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  if( num_phase_ispec_outer_core< 0 ) &
+      call exit_mpi(myrank,'error num_phase_ispec_outer_core is < zero')
+
+  allocate(phase_ispec_inner_outer_core(num_phase_ispec_outer_core,2),&
+          stat=ierr)
+  if( ierr /= 0 ) call exit_mpi(myrank, &
+      'error allocating array phase_ispec_inner_outer_core')
+
+  if(num_phase_ispec_outer_core> 0 ) then
+    local_dim = num_phase_ispec_outer_core * 2
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, &
+      "phase_ispec_inner/array", 0, 1, &
+      phase_ispec_inner_outer_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+  endif
+
+  ! mesh coloring for GPUs
+  if( USE_MESH_COLORING_GPU ) then
+    call adios_selection_writeblock(sel, myrank)
+    call adios_schedule_read(adios_handle, sel, "num_colors_outer", &
+      0, 1, num_colors_outer_outer_core, adios_err)
+    call adios_schedule_read(adios_handle, sel, "num_colors_inner", &
+      0, 1, num_colors_inner_outer_core, adios_err)
+    !call adios_get_scalar(adios_handle, "num_colors_outer", &
+        !num_colors_outer_outer_core, adios_err)
+    !call adios_get_scalar(adios_handle, "num_colors_inner", &
+        !num_colors_inner_outer_core, adios_err)
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+    ! colors
+
+    allocate(num_elem_colors_outer_core(num_colors_outer_outer_core+&
+        num_colors_inner_outer_core), stat=ierr)
+    if( ierr /= 0 ) &
+      call exit_mpi(myrank,'error allocating num_elem_colors_outer_core array')
+
+    local_dim = num_colors_outer_outer_core+ num_colors_inner_outer_core
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, &
+      "num_elem_colors/array", 0, 1, &
+      num_elem_colors_outer_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+  else
+    ! allocates dummy arrays
+    num_colors_outer_outer_core = 0
+    num_colors_inner_outer_core = 0
+    allocate(num_elem_colors_outer_core(num_colors_outer_outer_core+ &
+        num_colors_inner_outer_core), stat=ierr)
+    if( ierr /= 0 ) &
+      call exit_mpi(myrank, &
+          'error allocating num_elem_colors_outer_core array')
+  endif
+  ! Close ADIOS handler to the restart file.
+  call adios_selection_delete(sel)
+  call adios_read_close(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_finalize_method(ADIOS_READ_METHOD_BP, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call MPI_Barrier(comm, ierr)
+
+end subroutine read_mesh_databases_MPI_OC_adios
+
+
+!===============================================================================
+!> \brief Read outer core MPI arrays from an ADIOS file.
+subroutine read_mesh_databases_MPI_IC_adios()
+  use mpi
+  use adios_read_mod
+
+  use specfem_par
+  use specfem_par_innercore
+  implicit none
+
+  ! local parameters
+  integer :: sizeprocs, comm, ierr
+  character(len=150) :: file_name
+  integer(kind=8) :: group_size_inc
+  integer :: local_dim, global_dim, offset
+  ! ADIOS variables
+  integer                 :: adios_err
+  integer(kind=8)         :: adios_group, adios_handle, varid, sel
+  integer(kind=8)         :: adios_groupsize, adios_totalsize
+  integer :: vars_count, attrs_count, current_step, last_step, vsteps
+  character(len=128), dimension(:), allocatable :: adios_names 
+  integer(kind=8), dimension(1) :: start, count
+
+  ! create the name for the database of the current slide and region
+  call create_name_database_adios(prname, IREGION_INNER_CORE, LOCAL_PATH)
+
+  file_name= trim(prname) // "solver_data_mpi.bp" 
+  call MPI_Comm_dup (MPI_COMM_WORLD, comm, ierr)
+
+  call adios_read_init_method (ADIOS_READ_METHOD_BP, comm, &
+      "verbose=1", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_open_file (adios_handle, file_name, 0, comm, ierr)
+  call check_adios_err(myrank,adios_err)
+
+  ! MPI interfaces
+  call adios_selection_writeblock(sel, myrank)
+  call adios_schedule_read(adios_handle, sel, "num_interfaces", &
+    0, 1, num_interfaces_inner_core, adios_err)
+  !call adios_get_scalar(adios_handle, "num_interfaces", &
+      !num_interfaces_inner_core, adios_err)
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  allocate(my_neighbours_inner_core(num_interfaces_inner_core), &
+          nibool_interfaces_inner_core(num_interfaces_inner_core), &
+          stat=ierr)
+  if( ierr /= 0 ) call exit_mpi(myrank, &
+      'error allocating array my_neighbours_inner_core etc.')
+
+  if( num_interfaces_inner_core > 0 ) then
+    call adios_selection_writeblock(sel, myrank)
+    call adios_schedule_read(adios_handle, sel, "max_nibool_interfaces", &
+      0, 1, max_nibool_interfaces_ic, adios_err)
+    !call adios_get_scalar(adios_handle, "max_nibool_interfaces", &
+      !max_nibool_interfaces_ic, adios_err)
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    allocate(ibool_interfaces_inner_core(max_nibool_interfaces_ic, &
+        num_interfaces_inner_core), stat=ierr)
+    if( ierr /= 0 ) call exit_mpi(myrank, &
+        'error allocating array ibool_interfaces_inner_core')
+
+    local_dim = num_interfaces_inner_core
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, "my_neighbours/array", 0, 1, &
+      my_neighbours_inner_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+    call adios_schedule_read(adios_handle, sel, "nibool_interfaces/array", &
+      0, 1, nibool_interfaces_inner_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    local_dim = max_nibool_interfaces_ic * num_interfaces_inner_core
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, &
+      "ibool_interfaces/array", 0, 1, &
+      ibool_interfaces_inner_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+  else
+    ! dummy array
+    max_nibool_interfaces_ic = 0
+    allocate(ibool_interfaces_inner_core(0,0),stat=ierr)
+    if( ierr /= 0 ) call exit_mpi(myrank, &
+        'error allocating array dummy ibool_interfaces_inner_core')
+  endif
+
+  ! inner / outer elements
+  call adios_selection_writeblock(sel, myrank)
+  call adios_schedule_read(adios_handle, sel, "nspec_inner", &
+    0, 1, nspec_inner_inner_core, adios_err)
+  call adios_schedule_read(adios_handle, sel, "nspec_outer", &
+    0, 1, nspec_outer_inner_core, adios_err)
+  call adios_schedule_read(adios_handle, sel, "num_phase_ispec", &
+    0, 1, num_phase_ispec_inner_core, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec_inner", &
+      !nspec_inner_inner_core, adios_err)
+  !call adios_get_scalar(adios_handle, "nspec_outer", &
+      !nspec_outer_inner_core, adios_err)
+  !call adios_get_scalar(adios_handle, "num_phase_ispec", &
+      !num_phase_ispec_inner_core, adios_err)
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  if( num_phase_ispec_inner_core < 0 ) &
+      call exit_mpi(myrank,'error num_phase_ispec_inner_core is < zero')
+
+  allocate(phase_ispec_inner_inner_core(num_phase_ispec_inner_core,2),&
+          stat=ierr)
+  if( ierr /= 0 ) call exit_mpi(myrank, &
+      'error allocating array phase_ispec_inner_inner_core')
+
+  if(num_phase_ispec_inner_core > 0 ) then
+    local_dim = num_phase_ispec_inner_core * 2
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, &
+      "phase_ispec_inner/array", 0, 1, &
+      phase_ispec_inner_inner_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+  endif
+
+  ! mesh coloring for GPUs
+  if( USE_MESH_COLORING_GPU ) then
+    call adios_selection_writeblock(sel, myrank)
+    call adios_schedule_read(adios_handle, sel, "num_colors_outer", &
+      0, 1, num_colors_outer_inner_core, adios_err)
+    call adios_schedule_read(adios_handle, sel, "num_colors_inner", &
+      0, 1, num_colors_inner_inner_core, adios_err)
+    !call adios_get_scalar(adios_handle, "num_colors_outer", &
+        !num_colors_outer_inner_core, adios_err)
+    !call adios_get_scalar(adios_handle, "num_colors_inner", &
+        !num_colors_inner_inner_core, adios_err)
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+    ! colors
+
+    allocate(num_elem_colors_inner_core(num_colors_outer_inner_core +&
+        num_colors_inner_inner_core), stat=ierr)
+    if( ierr /= 0 ) &
+      call exit_mpi(myrank,'error allocating num_elem_colors_inner_core array')
+
+    local_dim = num_colors_outer_inner_core + num_colors_inner_inner_core 
+    start(1) = local_dim*myrank; count(1) = local_dim
+    call adios_selection_boundingbox (sel , 1, start, count)
+    call adios_schedule_read(adios_handle, sel, &
+      "num_elem_colors/array", 0, 1, &
+      num_elem_colors_inner_core, adios_err)
+    call check_adios_err(myrank,adios_err)
+
+    call adios_perform_reads(adios_handle, adios_err)
+    call check_adios_err(myrank,adios_err)
+  else
+    ! allocates dummy arrays
+    num_colors_outer_inner_core = 0
+    num_colors_inner_inner_core = 0
+    allocate(num_elem_colors_inner_core(num_colors_outer_inner_core + &
+        num_colors_inner_inner_core), stat=ierr)
+    if( ierr /= 0 ) &
+      call exit_mpi(myrank, &
+          'error allocating num_elem_colors_inner_core array')
+  endif
+  ! Close ADIOS handler to the restart file.
+  call adios_selection_delete(sel)
+  call adios_read_close(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_finalize_method(ADIOS_READ_METHOD_BP, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call MPI_Barrier(comm, ierr)
+
+end subroutine read_mesh_databases_MPI_IC_adios
+
+
+!===============================================================================
+!> \brief Read Stacey BC arrays from an ADIOS file.
+subroutine read_mesh_databases_stacey_adios()
+
+  use mpi
+  use adios_read_mod
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+
+  ! local parameters
+  integer :: ierr, comm, lnspec, lnglob, local_dim
+  ! processor identification
+  character(len=150) :: reg_name, file_name
+  ! ADIOS variables
+  integer                 :: adios_err
+  integer(kind=8)         :: adios_group, adios_handle, varid, sel
+  integer(kind=8)         :: adios_groupsize, adios_totalsize
+  integer :: vars_count, attrs_count, current_step, last_step, vsteps
+  character(len=128), dimension(:), allocatable :: adios_names 
+  integer(kind=8), dimension(1) :: start, count
+
+  ! crust and mantle
+
+  ! create name of database
+  call create_name_database_adios(reg_name, IREGION_CRUST_MANTLE, LOCAL_PATH)
+
+  file_name= trim(reg_name) // "stacey.bp" 
+  call MPI_Comm_dup (MPI_COMM_WORLD, comm, ierr)
+
+  call adios_read_init_method (ADIOS_READ_METHOD_BP, comm, &
+      "verbose=1", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_open_file (adios_handle, file_name, 0, comm, ierr)
+  call check_adios_err(myrank,adios_err)
+  ! read arrays for Stacey conditions
+
+  local_dim = 2*NSPEC2DMAX_XMIN_XMAX_CM 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "njmin/array", 0, 1, &
+      njmin_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "njmax/array", 0, 1, &
+      njmax_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "nkmin_xi/array", 0, 1, &
+      nkmin_xi_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = 2*NSPEC2DMAX_YMIN_YMAX_CM 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "nimin/array", 0, 1, &
+      nimin_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "nimax/array", 0, 1, &
+      nimax_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "nkmin_eta/array", 0, 1, &
+      nkmin_eta_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_selection_delete(sel)
+  call adios_read_close(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_finalize_method(ADIOS_READ_METHOD_BP, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  ! outer core
+
+  ! create name of database
+  call create_name_database_adios(reg_name, IREGION_OUTER_CORE, LOCAL_PATH)
+
+  file_name= trim(reg_name) // "stacey.bp" 
+  call MPI_Comm_dup (MPI_COMM_WORLD, comm, ierr)
+
+  call adios_read_init_method (ADIOS_READ_METHOD_BP, comm, &
+      "verbose=1", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_open_file (adios_handle, file_name, 0, comm, ierr)
+  call check_adios_err(myrank,adios_err)
+  ! read arrays for Stacey conditions
+
+  local_dim = 2*NSPEC2DMAX_XMIN_XMAX_OC 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "njmin/array", 0, 1, &
+      njmin_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "njmax/array", 0, 1, &
+      njmax_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "nkmin_xi/array", 0, 1, &
+      nkmin_xi_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = 2*NSPEC2DMAX_YMIN_YMAX_OC 
+  start(1) = local_dim*myrank; count(1) = local_dim
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "nimin/array", 0, 1, &
+      nimin_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "nimax/array", 0, 1, &
+      nimax_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_selection_boundingbox (sel , 1, start, count)
+  call adios_schedule_read(adios_handle, sel, "nkmin_eta/array", 0, 1, &
+      nkmin_eta_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_perform_reads(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_selection_delete(sel)
+  call adios_read_close(adios_handle, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_read_finalize_method(ADIOS_READ_METHOD_BP, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+end subroutine read_mesh_databases_stacey_adios
+

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_topography_bathymetry.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_topography_bathymetry.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/read_topography_bathymetry.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,85 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine read_topography_bathymetry()
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+  implicit none
+
+  include 'mpif.h'
+
+  ! local parameters
+  integer :: ier
+  ! timing
+  double precision, external :: wtime
+
+  ! get MPI starting time
+  time_start = wtime()
+
+  ! make ellipticity
+  if( ELLIPTICITY_VAL ) then
+    ! splines used for locating exact source/receivers positions
+    ! in locate_sources() and locate_receivers() routines
+    call make_ellipticity(nspl,rspl,espl,espl2,ONE_CRUST)
+  endif
+
+  ! read topography and bathymetry file
+  if( TOPOGRAPHY ) then
+    ! allocates topography array
+    allocate(ibathy_topo(NX_BATHY,NY_BATHY),stat=ier)
+    if( ier /= 0 ) call exit_mpi(myrank,'error allocating ibathy_topo array')
+
+    ! initializes
+    ibathy_topo(:,:) = 0
+
+    ! master reads file
+    if(myrank == 0 ) then
+      ! user output
+      write(IMAIN,*) 'topography:'
+
+      ! reads topo file
+      call read_topo_bathy_database(ibathy_topo,LOCAL_PATH)
+    endif
+
+    ! broadcast the information read on the master to the nodes
+    call MPI_BCAST(ibathy_topo,NX_BATHY*NY_BATHY,MPI_INTEGER,0,MPI_COMM_WORLD,ier)
+  endif
+
+  ! user output
+  call sync_all()
+  if( myrank == 0 .and. (TOPOGRAPHY .or. OCEANS_VAL .or. ELLIPTICITY_VAL)) then
+    ! elapsed time since beginning of mesh generation
+    tCPU = wtime() - time_start
+    write(IMAIN,*)
+    write(IMAIN,*) 'Elapsed time for reading topo/bathy in seconds = ',sngl(tCPU)
+    write(IMAIN,*)
+  endif
+
+  end subroutine read_topography_bathymetry

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/save_forward_arrays_adios.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/save_forward_arrays_adios.F90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/save_forward_arrays_adios.F90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,592 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+!-------------------------------------------------------------------------------
+!> \file save_forward_arrays_adios.F90
+!! \brief Save forward arrays with the help of the ADIOS library.
+!! \author MPBL
+!-------------------------------------------------------------------------------
+
+!-------------------------------------------------------------------------------
+!> \brief Write intermediate forward arrays in an ADIOS file.
+!! 
+!! This subroutine is only used when NUMBER_OF_RUNS > 1 and
+!! NUMBER_OF_THIS_RUN < NUMBER_OF_RUNS.
+subroutine save_intermediate_forward_arrays_adios()
+  ! External imports
+  use mpi
+  use adios_write_mod
+  ! Internal imports
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+  ! Local parameters
+  integer :: sizeprocs, comm, ierr
+  character(len=150) :: outputname
+  integer(kind=8) :: group_size_inc
+  integer :: local_dim, global_dim, offset
+  ! ADIOS variables
+  integer                 :: adios_err
+  integer(kind=8)         :: adios_group, adios_handle, varid
+  integer(kind=8)         :: adios_groupsize, adios_totalsize
+
+  outputname = trim(LOCAL_TMP_PATH) // "/dump_all_arrays_adios.bp" 
+  call world_size(sizeprocs) ! TODO keep it in parameters
+  call MPI_Comm_dup (MPI_COMM_WORLD, comm, ierr)
+  group_size_inc = 0
+  call adios_declare_group(adios_group, "SPECFEM3D_GLOBE_FORWARD_ARRAYS", &
+      "", 1, adios_err)
+!  call check_adios_err(myrank,adios_err)
+  call adios_select_method(adios_group, "MPI", "", "", adios_err)
+!  call check_adios_err(myrank,adios_err)
+
+  ! Define ADIOS variables
+  call define_common_forward_arrays_adios(adios_group, group_size_inc)
+  call define_rotation_forward_arrays_adios(adios_group, group_size_inc)
+  call define_attenuation_forward_arrays_adios(adios_group, group_size_inc)
+
+  ! Open an ADIOS handler to the restart file.
+  call adios_open (adios_handle, "SPECFEM3D_GLOBE_FORWARD_ARRAYS", &
+      outputname, "w", comm, adios_err);
+!  call check_adios_err(myrank,adios_err)
+  call adios_group_size (adios_handle, group_size_inc, &
+                         adios_totalsize, adios_err)
+!  call check_adios_err(myrank,adios_err)
+
+  ! Issue the order to write the previously defined variable to the ADIOS file
+  call write_common_forward_arrays_adios(adios_handle, sizeprocs)
+  call write_rotation_forward_arrays_adios(adios_handle, sizeprocs)
+  call write_attenuation_forward_arrays_adios(adios_handle, sizeprocs)
+  ! Reset the path to its original value to avoid bugs.
+  call adios_set_path (adios_handle, "", adios_err)
+!  call check_adios_err(myrank,adios_err)
+
+  ! Close ADIOS handler to the restart file.
+  call adios_close(adios_handle, adios_err)
+!  call check_adios_err(myrank,adios_err)
+end subroutine save_intermediate_forward_arrays_adios
+
+!-------------------------------------------------------------------------------
+!> \brief Write selected forward arrays in an ADIOS file.
+!! 
+!! This subroutine is only used for forward simualtions when
+!! SAVE_FORWARD is set to .true. It dumps the same arrays than 
+!! save_intermediate_forward_arrays_adios() execpt than some arrays
+!! are only dumped if ROTATION and ATTENUATION are set to .true.
+subroutine save_forward_arrays_adios()
+  ! External imports
+  use mpi
+  use adios_write_mod
+  ! Internal imports
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+  ! Local parameters
+  integer :: sizeprocs, comm, ierr
+  character(len=150) :: outputname
+  integer(kind=8) :: group_size_inc
+  integer :: local_dim, global_dim, offset
+!  integer, parameter :: num_arrays = 9 ! TODO correct number 
+!  character(len=256), dimension(num_arrays) :: local_dims1, local_dims2, &
+!      global_dims1, global_dims2, offsets1, offsets2, array_name
+  ! ADIOS variables
+  integer                 :: adios_err
+  integer(kind=8)         :: adios_group, adios_handle, varid
+  integer(kind=8)         :: adios_groupsize, adios_totalsize
+
+  outputname = trim(LOCAL_TMP_PATH) // "/save_forward_arrays.bp" 
+  call world_size(sizeprocs)
+  call MPI_Comm_dup (MPI_COMM_WORLD, comm, ierr)
+  group_size_inc = 0
+
+  call adios_declare_group(adios_group, "SPECFEM3D_GLOBE_FORWARD_ARRAYS", &
+      "", 1, adios_err)
+!  call check_adios_err(myrank,adios_err)
+  call adios_select_method(adios_group, "MPI", "", "", adios_err)
+!  call check_adios_err(myrank,adios_err)
+
+  ! Define ADIOS variables
+  call define_common_forward_arrays_adios(adios_group, group_size_inc)
+  ! conditional definition of vars seem to mess with the group size, 
+  ! even if the variables are conditionnaly written.
+!  if (ROTATION_VAL) then
+    call define_rotation_forward_arrays_adios(adios_group, group_size_inc)
+!  endif
+!  if (ATTENUATION_VAL) then
+    call define_attenuation_forward_arrays_adios(adios_group, group_size_inc)
+!  endif
+
+  ! Open an ADIOS handler to the restart file.
+  call adios_open (adios_handle, "SPECFEM3D_GLOBE_FORWARD_ARRAYS", &
+      outputname, "w", comm, adios_err);
+!  call check_adios_err(myrank,adios_err)
+  call adios_group_size (adios_handle, group_size_inc, &
+                         adios_totalsize, adios_err)
+!  call check_adios_err(myrank,adios_err)
+
+  ! Issue the order to write the previously defined variable to the ADIOS file
+  call write_common_forward_arrays_adios(adios_handle, sizeprocs)
+  if (ROTATION_VAL) then
+      call write_rotation_forward_arrays_adios(adios_handle, sizeprocs)
+  endif
+  if (ATTENUATION_VAL) then
+    call write_attenuation_forward_arrays_adios(adios_handle, sizeprocs)
+  endif
+  ! Reset the path to its original value to avoid bugs.
+  call adios_set_path (adios_handle, "", adios_err)
+!  call check_adios_err(myrank,adios_err)
+
+  ! Close ADIOS handler to the restart file.
+  call adios_close(adios_handle, adios_err)
+!  call check_adios_err(myrank,adios_err)
+end subroutine save_forward_arrays_adios
+
+!-------------------------------------------------------------------------------
+!> Define ADIOS forward arrays that are always dumped.
+!! \param adios_group The adios group where the variables belongs
+!! \param group_size_inc The inout adios group size to increment
+!!                       with the size of the variable
+subroutine define_common_forward_arrays_adios(adios_group, group_size_inc)
+  use adios_write_mod
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+
+  integer(kind=8), intent(in) :: adios_group
+  integer(kind=8), intent(inout) :: group_size_inc
+
+  integer :: local_dim
+
+  local_dim = NDIM * NGLOB_CRUST_MANTLE 
+  call define_adios_global_real_1d_array(adios_group, "displ_crust_mantle", &
+      local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, "veloc_crust_mantle", &
+      local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, "accel_crust_mantle", &
+      local_dim, group_size_inc)
+
+  local_dim = NDIM * NGLOB_INNER_CORE
+  call define_adios_global_real_1d_array(adios_group, "displ_inner_core", &
+      local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, "veloc_inner_core", &
+      local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, "accel_inner_core", &
+      local_dim, group_size_inc)
+
+  local_dim = NGLOB_OUTER_CORE
+  call define_adios_global_real_1d_array(adios_group, "displ_outer_core", &
+      local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, "veloc_outer_core", &
+      local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, "accel_outer_core", &
+      local_dim, group_size_inc)
+
+  local_dim = NGLLX * NGLLY * NGLLZ * NSPEC_CRUST_MANTLE_STR_OR_ATT
+  call define_adios_global_real_1d_array(adios_group, &
+      "epsilondev_xx_crust_mantle", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "epsilondev_yy_crust_mantle", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "epsilondev_xy_crust_mantle", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "epsilondev_xz_crust_mantle", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "epsilondev_yz_crust_mantle", local_dim, group_size_inc)
+
+  local_dim = NGLLX * NGLLY * NGLLZ * NSPEC_INNER_CORE_STR_OR_ATT
+  call define_adios_global_real_1d_array(adios_group, &
+      "epsilondev_xx_inner_core", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "epsilondev_yy_inner_core", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "epsilondev_xy_inner_core", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "epsilondev_xz_inner_core", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "epsilondev_yz_inner_core", local_dim, group_size_inc)
+end subroutine define_common_forward_arrays_adios
+
+!-------------------------------------------------------------------------------
+!> Define ADIOS forward arrays that are dumped if ROTATION is true.
+!! \param adios_group The adios group where the variables belongs
+!! \param group_size_inc The inout adios group size to increment
+!!                       with the size of the variable
+subroutine define_rotation_forward_arrays_adios(adios_group, group_size_inc)
+  use adios_write_mod
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+
+  integer(kind=8), intent(in) :: adios_group
+  integer(kind=8), intent(inout) :: group_size_inc
+
+  integer :: local_dim
+
+  local_dim = NGLLX * NGLLY * NGLLZ * NSPEC_OUTER_CORE_ROTATION
+  call define_adios_global_real_1d_array(adios_group, &
+      "A_array_rotation", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "B_array_rotation", local_dim, group_size_inc)
+end subroutine define_rotation_forward_arrays_adios
+
+!-------------------------------------------------------------------------------
+!> Define ADIOS forward arrays that are dumped if ATTENUATION is true.
+!! \param adios_group The adios group where the variables belongs
+!! \param group_size_inc The inout adios group size to increment
+!!                       with the size of the variable
+subroutine define_attenuation_forward_arrays_adios(adios_group, group_size_inc)
+  use adios_write_mod
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+
+  integer(kind=8), intent(in) :: adios_group
+  integer(kind=8), intent(inout) :: group_size_inc
+
+  integer :: local_dim
+
+  local_dim = N_SLS*NGLLX*NGLLY*NGLLZ*NSPEC_CRUST_MANTLE_ATTENUAT
+  call define_adios_global_real_1d_array(adios_group, &
+      "R_xx_crust_mantle", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "R_yy_crust_mantle", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "R_xy_crust_mantle", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "R_xz_crust_mantle", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "R_yz_crust_mantle", local_dim, group_size_inc)
+
+  local_dim = N_SLS*NGLLX*NGLLY*NGLLZ*NSPEC_INNER_CORE_ATTENUATION
+  call define_adios_global_real_1d_array(adios_group, &
+      "R_xx_inner_core", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "R_yy_inner_core", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "R_xy_inner_core", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "R_xz_inner_core", local_dim, group_size_inc)
+  call define_adios_global_real_1d_array(adios_group, &
+      "R_yz_inner_core", local_dim, group_size_inc)
+end subroutine define_attenuation_forward_arrays_adios
+
+!-------------------------------------------------------------------------------
+!>  Schedule writes of ADIOS forward arrays that are always dumped.
+!! \param adios_handle The handle to the adios bp file
+!! \param group_size_inc The number of MPI processes involved in the writting 
+subroutine write_common_forward_arrays_adios(adios_handle, sizeprocs)
+  use adios_write_mod
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+
+  integer(kind=8), intent(in) :: adios_handle
+  integer, intent(in) :: sizeprocs 
+
+  integer :: local_dim, adios_err
+
+  local_dim = NDIM * NGLOB_CRUST_MANTLE 
+  call adios_set_path (adios_handle, "displ_crust_mantle", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", displ_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "veloc_crust_mantle", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", veloc_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "accel_crust_mantle", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", accel_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = NDIM * NGLOB_INNER_CORE
+  call adios_set_path (adios_handle, "displ_inner_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", displ_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "veloc_inner_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", veloc_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "accel_inner_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", accel_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_set_path (adios_handle, "", adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = NGLOB_OUTER_CORE
+  call adios_set_path (adios_handle, "displ_outer_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", displ_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "veloc_outer_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", veloc_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "accel_outer_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", accel_outer_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = NGLLX * NGLLY * NGLLZ * NSPEC_CRUST_MANTLE_STR_OR_ATT
+  call adios_set_path (adios_handle, "epsilondev_xx_crust_mantle", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", epsilondev_xx_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "epsilondev_yy_crust_mantle", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", epsilondev_yy_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "epsilondev_xy_crust_mantle", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", epsilondev_xy_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "epsilondev_xz_crust_mantle", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", epsilondev_xz_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "epsilondev_yz_crust_mantle", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", epsilondev_yz_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = NGLLX * NGLLY * NGLLZ * NSPEC_INNER_CORE_STR_OR_ATT
+  call adios_set_path (adios_handle, "epsilondev_xx_inner_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", epsilondev_xx_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "epsilondev_yy_inner_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", epsilondev_yy_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "epsilondev_xy_inner_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", epsilondev_xy_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "epsilondev_xz_inner_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", epsilondev_xz_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "epsilondev_yz_inner_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", epsilondev_yz_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+end subroutine write_common_forward_arrays_adios
+
+!-------------------------------------------------------------------------------
+!>  Schedule writes of ADIOS forward arrays that are dumped if ROTATION is true.
+!! \param adios_handle The handle to the adios bp file
+!! \param group_size_inc The number of MPI processes involved in the writting 
+subroutine write_rotation_forward_arrays_adios(adios_handle, sizeprocs)
+  use adios_write_mod
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+
+  integer(kind=8), intent(in) :: adios_handle
+  integer, intent(in) :: sizeprocs 
+
+  integer :: local_dim, adios_err
+
+  local_dim = NGLLX * NGLLY * NGLLZ * NSPEC_OUTER_CORE_ROTATION
+  call adios_set_path (adios_handle, "A_array_rotation", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", A_array_rotation, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "B_array_rotation", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", B_array_rotation, adios_err)
+  call check_adios_err(myrank,adios_err)
+end subroutine write_rotation_forward_arrays_adios
+
+!-------------------------------------------------------------------------------
+!>  Schedule writes of ADIOS forward arrays that are dumped if ATTENUATION 
+!!  is true.
+!! \param adios_handle The handle to the adios bp file
+!! \param group_size_inc The number of MPI processes involved in the writting 
+subroutine write_attenuation_forward_arrays_adios(adios_handle, sizeprocs)
+  use adios_write_mod
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+
+  integer(kind=8), intent(in) :: adios_handle
+  integer, intent(in) :: sizeprocs 
+
+  integer :: local_dim, adios_err
+
+  local_dim = N_SLS*NGLLX*NGLLY*NGLLZ*NSPEC_CRUST_MANTLE_ATTENUAT
+  call adios_set_path (adios_handle, "R_xx_crust_mantle", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", R_xx_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "R_yy_crust_mantle", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", R_yy_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "R_xy_crust_mantle", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", R_xy_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "R_xz_crust_mantle", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", R_xz_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "R_yz_crust_mantle", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", R_yz_crust_mantle, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  local_dim = N_SLS*NGLLX*NGLLY*NGLLZ*NSPEC_INNER_CORE_ATTENUATION
+  call adios_set_path (adios_handle, "R_xx_inner_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", R_xx_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "R_yy_inner_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", R_yy_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "R_xy_inner_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", R_xy_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "R_xz_inner_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", R_xz_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+
+  call adios_set_path (adios_handle, "R_yz_inner_core", adios_err)
+  call check_adios_err(myrank,adios_err)
+  call write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  call adios_write(adios_handle, "array", R_yz_inner_core, adios_err)
+  call check_adios_err(myrank,adios_err)
+end subroutine write_attenuation_forward_arrays_adios
+
+!-------------------------------------------------------------------------------
+!> Write local, global and offset dimensions to ADIOS 
+!! \param adios_handle Handle to the adios file
+!! \param local_dim Number of elements to be written by one process
+!! \param sizeprocs Number of MPI processes
+subroutine write_1D_global_array_adios_dims(adios_handle, local_dim, sizeprocs)
+  use adios_write_mod
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+
+  implicit none
+
+  integer(kind=8), intent(in) :: adios_handle
+  integer, intent(in) :: sizeprocs, local_dim
+
+  integer :: adios_err
+
+  call adios_write(adios_handle, "local_dim", local_dim, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_write(adios_handle, "global_dim", local_dim*sizeprocs, adios_err)
+  call check_adios_err(myrank,adios_err)
+  call adios_write(adios_handle, "offset", local_dim*myrank, adios_err)
+  call check_adios_err(myrank,adios_err)
+end subroutine write_1D_global_array_adios_dims
+

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/setup_GLL_points.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/setup_GLL_points.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/setup_GLL_points.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,57 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine setup_GLL_points()
+
+  use specfem_par
+  implicit none
+
+  ! local parameters
+  integer :: i,j
+
+  ! set up GLL points, weights and derivation matrices
+  call define_derivation_matrices(xigll,yigll,zigll,wxgll,wygll,wzgll, &
+                                 hprime_xx,hprime_yy,hprime_zz, &
+                                 hprimewgll_xx,hprimewgll_yy,hprimewgll_zz, &
+                                 wgllwgll_xy,wgllwgll_xz,wgllwgll_yz,wgll_cube)
+
+  if( USE_DEVILLE_PRODUCTS_VAL ) then
+
+  ! check that optimized routines from Deville et al. (2002) can be used
+    if(NGLLX /= 5 .or. NGLLY /= 5 .or. NGLLZ /= 5) &
+      stop 'Deville et al. (2002) routines can only be used if NGLLX = NGLLY = NGLLZ = 5'
+
+    ! define transpose of derivation matrix
+    do j = 1,NGLLY
+      do i = 1,NGLLX
+        hprime_xxT(j,i) = hprime_xx(i,j)
+        hprimewgll_xxT(j,i) = hprimewgll_xx(i,j)
+      enddo
+    enddo
+  endif
+
+  end subroutine setup_GLL_points

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D_par.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D_par.F90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/specfem3D_par.F90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,753 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+!
+! United States and French Government Sponsorship Acknowledged.
+
+module constants_solver
+
+  include "constants.h"
+
+  ! include values created by the mesher
+  ! done for performance only using static allocation to allow for loop unrolling
+  include "OUTPUT_FILES/values_from_mesher.h"
+
+end module constants_solver
+
+!=====================================================================
+
+module specfem_par
+
+! main parameter module for specfem simulations
+
+  use constants_solver
+
+  implicit none
+
+  !-----------------------------------------------------------------
+  ! GLL points & weights
+  !-----------------------------------------------------------------
+
+  ! Gauss-Lobatto-Legendre points of integration and weights
+  double precision, dimension(NGLLX) :: xigll,wxgll
+  double precision, dimension(NGLLY) :: yigll,wygll
+  double precision, dimension(NGLLZ) :: zigll,wzgll
+
+  ! product of weights for gravity term
+  double precision, dimension(NGLLX,NGLLY,NGLLZ) :: wgll_cube
+
+  ! array with derivatives of Lagrange polynomials and precalculated products
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xx,hprimewgll_xx
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLX) :: hprime_xxT,hprimewgll_xxT
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLY) :: hprime_yy,hprimewgll_yy
+  real(kind=CUSTOM_REAL), dimension(NGLLZ,NGLLZ) :: hprime_zz,hprimewgll_zz
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY) :: wgllwgll_xy
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ) :: wgllwgll_xz
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ) :: wgllwgll_yz
+
+
+  !-----------------------------------------------------------------
+  ! attenuation parameters
+  !-----------------------------------------------------------------
+
+  ! memory variables and standard linear solids for attenuation
+  real(kind=CUSTOM_REAL), dimension(N_SLS) :: alphaval, betaval, gammaval
+  ! ADJOINT
+  real(kind=CUSTOM_REAL), dimension(N_SLS) :: b_alphaval, b_betaval, b_gammaval
+
+  ! attenuation: predictor
+  double precision, dimension(N_SLS) :: tau_sigma_dble
+
+  !-----------------------------------------------------------------
+  ! topography/bathymetry & oceans
+  !-----------------------------------------------------------------
+
+  ! use integer array to store values
+  integer, dimension(:,:),allocatable :: ibathy_topo
+
+  ! additional mass matrix for ocean load
+  real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE_OCEANS) :: rmass_ocean_load
+
+  ! flag to mask ocean-bottom degrees of freedom for ocean load
+  logical, dimension(NGLOB_CRUST_MANTLE_OCEANS) :: updated_dof_ocean_load
+
+  integer :: npoin_oceans
+  integer, dimension(:),allocatable :: ibool_ocean_load
+  real(kind=CUSTOM_REAL), dimension(:,:),allocatable :: normal_ocean_load
+  real(kind=CUSTOM_REAL), dimension(:),allocatable :: rmass_ocean_load_selected
+
+  !-----------------------------------------------------------------
+  ! ellipticity
+  !-----------------------------------------------------------------
+
+  ! for ellipticity
+  integer :: nspl
+  double precision,dimension(NR) :: rspl,espl,espl2
+
+  !-----------------------------------------------------------------
+  ! rotation
+  !-----------------------------------------------------------------
+
+  ! non-dimensionalized rotation rate of the Earth times two
+  real(kind=CUSTOM_REAL) :: two_omega_earth
+
+  ! for the Euler scheme for rotation
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROTATION) :: &
+    A_array_rotation,B_array_rotation
+
+  !ADJOINT
+  real(kind=CUSTOM_REAL) b_two_omega_earth
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ROT_ADJOINT) :: &
+    b_A_array_rotation,b_B_array_rotation
+
+  !-----------------------------------------------------------------
+  ! gravity
+  !-----------------------------------------------------------------
+
+  ! lookup table every km for gravity
+  real(kind=CUSTOM_REAL) :: minus_g_cmb,minus_g_icb
+  double precision, dimension(NRAD_GRAVITY) :: minus_gravity_table, &
+    minus_deriv_gravity_table,density_table,d_ln_density_dr_table,minus_rho_g_over_kappa_fluid
+
+  !-----------------------------------------------------------------
+  ! sources
+  !-----------------------------------------------------------------
+
+  ! parameters for the source
+  integer :: NSOURCES,nsources_local
+  integer, dimension(:), allocatable :: islice_selected_source,ispec_selected_source
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: sourcearrays
+  double precision, dimension(:,:,:) ,allocatable:: nu_source
+
+  double precision, dimension(:), allocatable :: Mxx,Myy,Mzz,Mxy,Mxz,Myz
+  double precision, dimension(:), allocatable :: xi_source,eta_source,gamma_source
+  double precision, dimension(:), allocatable :: tshift_cmt,hdur,hdur_gaussian
+  double precision, dimension(:), allocatable :: theta_source,phi_source
+  double precision :: t0
+
+  !-----------------------------------------------------------------
+  ! receivers
+  !-----------------------------------------------------------------
+
+  ! receiver information
+  integer :: nrec,nrec_local
+  integer, dimension(:), allocatable :: islice_selected_rec,ispec_selected_rec
+  integer, dimension(:), allocatable :: number_receiver_global
+  double precision, dimension(:), allocatable :: xi_receiver,eta_receiver,gamma_receiver
+  double precision, dimension(:,:,:), allocatable :: nu
+  double precision, allocatable, dimension(:) :: stlat,stlon,stele,stbur
+  character(len=MAX_LENGTH_STATION_NAME), dimension(:), allocatable  :: station_name
+  character(len=MAX_LENGTH_NETWORK_NAME), dimension(:), allocatable :: network_name
+  character(len=150) :: STATIONS,rec_filename
+
+  ! Lagrange interpolators at receivers
+  double precision, dimension(:,:), allocatable :: hxir_store,hetar_store,hgammar_store
+
+  !ADJOINT
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:,:,:), allocatable :: adj_sourcearrays
+  integer :: nrec_simulation, nadj_rec_local
+  integer :: NSTEP_SUB_ADJ  ! to read input in chunks
+  integer, dimension(:,:), allocatable :: iadjsrc ! to read input in chunks
+  integer, dimension(:), allocatable :: iadjsrc_len,iadj_vec
+  ! source frechet derivatives
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: moment_der
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: sloc_der
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: stshift_der, shdur_der
+  double precision, dimension(:,:), allocatable :: hpxir_store,hpetar_store,hpgammar_store
+  integer :: nadj_hprec_local
+
+  !-----------------------------------------------------------------
+  ! seismograms
+  !-----------------------------------------------------------------
+
+  ! seismograms
+  integer :: it_begin,it_end,nit_written
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: seismograms
+  integer :: seismo_offset, seismo_current
+
+  ! for SAC headers for seismograms
+  integer :: yr_SAC,jda_SAC,ho_SAC,mi_SAC
+  double precision :: sec_SAC
+  real :: mb_SAC
+  double precision :: t_cmt_SAC,t_shift_SAC
+  double precision :: elat_SAC,elon_SAC,depth_SAC, &
+    cmt_lat_SAC,cmt_lon_SAC,cmt_depth_SAC,cmt_hdur_SAC
+  character(len=20) :: event_name_SAC
+
+  !-----------------------------------------------------------------
+  ! file parameters
+  !-----------------------------------------------------------------
+
+  ! parameters read from parameter file
+  double precision DT,ROCEAN,RMIDDLE_CRUST, &
+          RMOHO,R80,R220,R400,R600,R670,R771,RTOPDDOUBLEPRIME,RCMB,RICB, &
+          RHO_TOP_OC,RHO_BOTTOM_OC,RHO_OCEANS,HDUR_MOVIE, &
+          MOVIE_TOP,MOVIE_BOTTOM,MOVIE_WEST,MOVIE_EAST,MOVIE_NORTH,MOVIE_SOUTH, &
+          ANGULAR_WIDTH_XI_IN_DEGREES
+
+  integer MIN_ATTENUATION_PERIOD,MAX_ATTENUATION_PERIOD,NER_CRUST, &
+          NER_80_MOHO,NER_220_80,NER_400_220,NER_600_400,NER_670_600,NER_771_670, &
+          NER_TOPDDOUBLEPRIME_771,NER_CMB_TOPDDOUBLEPRIME,NER_OUTER_CORE, &
+          NER_TOP_CENTRAL_CUBE_ICB,NEX_XI,NEX_ETA, &
+          NTSTEP_BETWEEN_OUTPUT_SEISMOS,&
+          NTSTEP_BETWEEN_READ_ADJSRC,NSTEP,NTSTEP_BETWEEN_FRAMES, &
+          NTSTEP_BETWEEN_OUTPUT_INFO,NUMBER_OF_RUNS,NUMBER_OF_THIS_RUN,SIMULATION_TYPE, &
+          MOVIE_VOLUME_TYPE,MOVIE_START,MOVIE_STOP,NOISE_TOMOGRAPHY
+
+  logical ONE_CRUST,TOPOGRAPHY,MOVIE_SURFACE,MOVIE_VOLUME,MOVIE_COARSE, &
+          RECEIVERS_CAN_BE_BURIED,PRINT_SOURCE_TIME_FUNCTION, &
+          SAVE_MESH_FILES,ABSORBING_CONDITIONS,INCLUDE_CENTRAL_CUBE,SAVE_FORWARD, &
+          OUTPUT_SEISMOS_ASCII_TEXT,OUTPUT_SEISMOS_SAC_ALPHANUM,OUTPUT_SEISMOS_SAC_BINARY, &
+          ROTATE_SEISMOGRAMS_RT,HONOR_1D_SPHERICAL_MOHO,WRITE_SEISMOGRAMS_BY_MASTER,&
+          SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE
+
+  character(len=150) :: OUTPUT_FILES,LOCAL_PATH,LOCAL_TMP_PATH,MODEL
+  ! process/partition name
+  character(len=150) :: prname
+
+
+  !-----------------------------------------------------------------
+  ! mesh
+  !-----------------------------------------------------------------
+
+  ! this for all the regions
+  integer, dimension(MAX_NUM_REGIONS) :: NSPEC2D_BOTTOM,NSPEC2D_TOP
+
+  ! computed in read_compute_parameters
+  integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: ner,ratio_sampling_array
+  integer, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: doubling_index
+  double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: r_bottom,r_top
+  logical, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: this_region_has_a_doubling
+  double precision, dimension(MAX_NUMBER_OF_MESH_LAYERS) :: rmins,rmaxs
+
+  !-----------------------------------------------------------------
+  ! MPI partitions
+  !-----------------------------------------------------------------
+
+  ! proc numbers for MPI
+  integer :: myrank
+  integer :: ichunk ! needed for stacey boundaries
+
+  ! time loop timing
+  double precision :: time_start,tCPU
+
+  !-----------------------------------------------------------------
+  ! assembly
+  !-----------------------------------------------------------------
+
+  ! collected MPI interfaces
+  ! MPI crust/mantle mesh
+  integer :: num_interfaces_crust_mantle
+  integer :: max_nibool_interfaces_cm
+  integer, dimension(:), allocatable :: my_neighbours_crust_mantle,nibool_interfaces_crust_mantle
+  integer, dimension(:,:), allocatable :: ibool_interfaces_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_send_vector_crust_mantle,buffer_recv_vector_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: b_buffer_send_vector_cm,b_buffer_recv_vector_cm
+
+  integer, dimension(:), allocatable :: request_send_vector_cm,request_recv_vector_cm
+  integer, dimension(:), allocatable :: b_request_send_vector_cm,b_request_recv_vector_cm
+
+  ! MPI inner core mesh
+  integer :: num_interfaces_inner_core
+  integer :: max_nibool_interfaces_ic
+  integer, dimension(:), allocatable :: my_neighbours_inner_core,nibool_interfaces_inner_core
+  integer, dimension(:,:), allocatable :: ibool_interfaces_inner_core
+
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: buffer_send_vector_inner_core,buffer_recv_vector_inner_core
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: b_buffer_send_vector_inner_core,b_buffer_recv_vector_inner_core
+
+  integer, dimension(:), allocatable :: request_send_vector_ic,request_recv_vector_ic
+  integer, dimension(:), allocatable :: b_request_send_vector_ic,b_request_recv_vector_ic
+
+  ! MPI outer core mesh
+  integer :: num_interfaces_outer_core
+  integer :: max_nibool_interfaces_oc
+  integer, dimension(:), allocatable :: my_neighbours_outer_core,nibool_interfaces_outer_core
+  integer, dimension(:,:), allocatable :: ibool_interfaces_outer_core
+
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: buffer_send_scalar_outer_core,buffer_recv_scalar_outer_core
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: b_buffer_send_scalar_outer_core,b_buffer_recv_scalar_outer_core
+
+  integer, dimension(:), allocatable :: request_send_scalar_oc,request_recv_scalar_oc
+  integer, dimension(:), allocatable :: b_request_send_scalar_oc,b_request_recv_scalar_oc
+
+  !-----------------------------------------------------------------
+  ! gpu
+  !-----------------------------------------------------------------
+
+  ! CUDA mesh pointer<->integer wrapper
+  integer(kind=8) :: Mesh_pointer
+  logical :: GPU_MODE
+
+  !-----------------------------------------------------------------
+  ! ADIOS
+  !-----------------------------------------------------------------
+
+  logical :: ADIOS_ENABLED, ADIOS_FOR_FORWARD_ARRAYS, ADIOS_FOR_MPI_ARRAYS, &
+      ADIOS_FOR_ARRAYS_SOLVER, ADIOS_FOR_SOLVER_MESHFILES, ADIOS_FOR_AVS_DX
+
+  !-----------------------------------------------------------------
+  ! time scheme
+  !-----------------------------------------------------------------
+
+  integer :: it
+
+  ! Newmark time scheme parameters and non-dimensionalization
+  double precision :: scale_t,scale_t_inv,scale_displ,scale_veloc
+  real(kind=CUSTOM_REAL) :: deltat,deltatover2,deltatsqover2
+  ! ADJOINT
+  real(kind=CUSTOM_REAL) :: b_deltat,b_deltatover2,b_deltatsqover2
+
+#ifdef _HANDOPT
+  integer :: imodulo_NGLOB_CRUST_MANTLE,imodulo_NGLOB_CRUST_MANTLE4, &
+            imodulo_NGLOB_INNER_CORE,imodulo_NGLOB_OUTER_CORE
+#endif
+
+end module specfem_par
+
+
+!=====================================================================
+
+module specfem_par_crustmantle
+
+! parameter module for elastic solver in crust/mantle region
+
+  use constants_solver
+  implicit none
+
+  ! ----------------- crust, mantle and oceans ---------------------
+  ! mesh parameters
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: ibool_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE) :: &
+    xix_crust_mantle,xiy_crust_mantle,xiz_crust_mantle,&
+    etax_crust_mantle,etay_crust_mantle,etaz_crust_mantle, &
+    gammax_crust_mantle,gammay_crust_mantle,gammaz_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(NGLOB_CRUST_MANTLE) :: &
+    xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle
+
+  ! arrays for isotropic elements stored only where needed to save space
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ISO_MANTLE) :: &
+    rhostore_crust_mantle,kappavstore_crust_mantle,muvstore_crust_mantle
+
+  ! arrays for anisotropic elements stored only where needed to save space
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_TISO_MANTLE) :: &
+    kappahstore_crust_mantle,muhstore_crust_mantle,eta_anisostore_crust_mantle
+
+  ! arrays for full anisotropy only when needed
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_MANTLE) :: &
+    c11store_crust_mantle,c12store_crust_mantle,c13store_crust_mantle, &
+    c14store_crust_mantle,c15store_crust_mantle,c16store_crust_mantle, &
+    c22store_crust_mantle,c23store_crust_mantle,c24store_crust_mantle, &
+    c25store_crust_mantle,c26store_crust_mantle,c33store_crust_mantle, &
+    c34store_crust_mantle,c35store_crust_mantle,c36store_crust_mantle, &
+    c44store_crust_mantle,c45store_crust_mantle,c46store_crust_mantle, &
+    c55store_crust_mantle,c56store_crust_mantle,c66store_crust_mantle
+
+  ! flag for transversely isotropic elements
+  logical, dimension(NSPEC_CRUST_MANTLE) :: ispec_is_tiso_crust_mantle
+
+  ! mass matrices
+  !
+  ! in the case of stacey boundary conditions, add C*deltat/2 contribution to the mass matrix
+  ! on Stacey edges for the crust_mantle and outer_core regions but not for the inner_core region
+  ! thus the mass matrix must be replaced by three mass matrices including the "C" damping matrix
+  !
+  ! if absorbing_conditions are not set or if NCHUNKS=6, only one mass matrix is needed
+  ! for the sake of performance, only "rmassz" array will be filled and "rmassx" & "rmassy" will be obsolete
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmassx_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmassy_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmassz_crust_mantle
+  integer :: NGLOB_XY_CM
+
+  ! displacement, velocity, acceleration
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE) :: &
+     displ_crust_mantle,veloc_crust_mantle,accel_crust_mantle
+
+  ! ADJOINT
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_CRUST_MANTLE_ADJOINT) :: &
+    b_displ_crust_mantle,b_veloc_crust_mantle,b_accel_crust_mantle
+
+  ! memory variables and standard linear solids for attenuation
+  real(kind=CUSTOM_REAL), dimension(ATT1,ATT2,ATT3,ATT4) :: &
+    one_minus_sum_beta_crust_mantle, factor_scale_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1,ATT2,ATT3,ATT4) :: &
+    factor_common_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ATTENUAT) :: &
+    R_xx_crust_mantle,R_yy_crust_mantle,R_xy_crust_mantle,R_xz_crust_mantle,R_yz_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_OR_ATT) :: &
+    epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+    epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STRAIN_ONLY) :: &
+    eps_trace_over_3_crust_mantle
+
+  ! ADJOINT
+  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STR_AND_ATT) :: &
+    b_R_xx_crust_mantle,b_R_yy_crust_mantle,b_R_xy_crust_mantle,b_R_xz_crust_mantle,b_R_yz_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+    b_epsilondev_xx_crust_mantle,b_epsilondev_yy_crust_mantle,b_epsilondev_xy_crust_mantle, &
+    b_epsilondev_xz_crust_mantle,b_epsilondev_yz_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+    b_eps_trace_over_3_crust_mantle
+
+  ! for crust/oceans coupling
+  integer, dimension(NSPEC2DMAX_XMIN_XMAX_CM) :: ibelm_xmin_crust_mantle,ibelm_xmax_crust_mantle
+  integer, dimension(NSPEC2DMAX_YMIN_YMAX_CM) :: ibelm_ymin_crust_mantle,ibelm_ymax_crust_mantle
+  integer, dimension(NSPEC2D_BOTTOM_CM) :: ibelm_bottom_crust_mantle
+  integer, dimension(NSPEC2D_TOP_CM) :: ibelm_top_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_CM) :: &
+    jacobian2D_bottom_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_CM) :: &
+    jacobian2D_top_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_CM) :: &
+    jacobian2D_xmin_crust_mantle,jacobian2D_xmax_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_CM) :: &
+    jacobian2D_ymin_crust_mantle,jacobian2D_ymax_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_CM) :: &
+    normal_xmin_crust_mantle,normal_xmax_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2DMAX_YMIN_YMAX_CM) :: &
+    normal_ymin_crust_mantle,normal_ymax_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_CM) :: &
+    normal_bottom_crust_mantle
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_CM) :: &
+    normal_top_crust_mantle
+
+  ! Stacey
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_STACEY) :: &
+    rho_vp_crust_mantle,rho_vs_crust_mantle
+  integer :: nspec2D_xmin_crust_mantle,nspec2D_xmax_crust_mantle, &
+            nspec2D_ymin_crust_mantle,nspec2D_ymax_crust_mantle
+  integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_CM) :: nimin_crust_mantle,nimax_crust_mantle,nkmin_eta_crust_mantle
+  integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_CM) :: njmin_crust_mantle,njmax_crust_mantle,nkmin_xi_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: absorb_xmin_crust_mantle, &
+    absorb_xmax_crust_mantle, absorb_ymin_crust_mantle, absorb_ymax_crust_mantle
+
+  integer :: reclen_xmin_crust_mantle, reclen_xmax_crust_mantle, &
+            reclen_ymin_crust_mantle,reclen_ymax_crust_mantle
+
+  ! kernels
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+    rho_kl_crust_mantle,beta_kl_crust_mantle, alpha_kl_crust_mantle, Sigma_kl_crust_mantle
+  ! For anisotropic kernels (see compute_kernels.f90 for a definition of the array)
+  real(kind=CUSTOM_REAL), dimension(21,NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_ADJOINT) :: &
+    cijkl_kl_crust_mantle
+  ! approximate hessian
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:),allocatable :: hess_kl_crust_mantle
+
+  ! Boundary Mesh and Kernels
+  integer :: k_top,k_bot,iregion_code
+  integer, dimension(NSPEC2D_MOHO) :: ibelm_moho_top,ibelm_moho_bot
+  integer, dimension(NSPEC2D_400) :: ibelm_400_top,ibelm_400_bot
+  integer, dimension(NSPEC2D_670) :: ibelm_670_top,ibelm_670_bot
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_MOHO) :: normal_moho
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_400) :: normal_400
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_670) :: normal_670
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_MOHO) :: moho_kl, moho_kl_top, moho_kl_bot
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_400) :: d400_kl, d400_kl_top, d400_kl_bot
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_670) ::  d670_kl, d670_kl_top, d670_kl_bot
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_CMB) :: cmb_kl, cmb_kl_top, cmb_kl_bot
+
+  ! NOISE_TOMOGRAPHY
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:,:), allocatable :: noise_sourcearray
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: &
+    normal_x_noise,normal_y_noise,normal_z_noise, mask_noise
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:), allocatable :: noise_surface_movie
+  integer :: irec_master_noise
+  integer :: NSPEC_TOP
+
+  ! inner / outer elements crust/mantle region
+  integer :: num_phase_ispec_crust_mantle
+  integer :: nspec_inner_crust_mantle,nspec_outer_crust_mantle
+  integer, dimension(:,:), allocatable :: phase_ispec_inner_crust_mantle
+
+  ! mesh coloring
+  integer :: num_colors_outer_crust_mantle,num_colors_inner_crust_mantle
+  integer,dimension(:),allocatable :: num_elem_colors_crust_mantle
+
+end module specfem_par_crustmantle
+
+!=====================================================================
+
+module specfem_par_innercore
+
+! parameter module for elastic solver in inner core region
+
+  use constants_solver
+  implicit none
+
+  ! ----------------- inner core ---------------------
+  ! mesh parameters
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: ibool_inner_core
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
+    xix_inner_core,xiy_inner_core,xiz_inner_core,&
+    etax_inner_core,etay_inner_core,etaz_inner_core, &
+    gammax_inner_core,gammay_inner_core,gammaz_inner_core
+
+  ! material parameters
+  ! (note: muvstore also needed for attenuation in case of anisotropic inner core)
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE) :: &
+    rhostore_inner_core,kappavstore_inner_core,muvstore_inner_core
+
+  real(kind=CUSTOM_REAL), dimension(NGLOB_INNER_CORE) :: &
+    xstore_inner_core,ystore_inner_core,zstore_inner_core
+
+  ! arrays for inner-core anisotropy only when needed
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPECMAX_ANISO_IC) :: &
+    c11store_inner_core,c33store_inner_core,c12store_inner_core, &
+    c13store_inner_core,c44store_inner_core
+
+  ! local to global mapping
+  integer, dimension(NSPEC_INNER_CORE) :: idoubling_inner_core
+
+  ! mass matrix
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass_inner_core
+
+  ! displacement, velocity, acceleration
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE) :: &
+    displ_inner_core,veloc_inner_core,accel_inner_core
+  ! ADJOINT
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_INNER_CORE_ADJOINT) :: &
+    b_displ_inner_core,b_veloc_inner_core,b_accel_inner_core
+
+  ! memory variables and standard linear solids for attenuation
+  real(kind=CUSTOM_REAL), dimension(ATT1,ATT2,ATT3,ATT5) :: &
+    one_minus_sum_beta_inner_core, factor_scale_inner_core
+  real(kind=CUSTOM_REAL), dimension(N_SLS,ATT1,ATT2,ATT3,ATT5) :: &
+    factor_common_inner_core
+  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ATTENUATION) :: &
+    R_xx_inner_core,R_yy_inner_core,R_xy_inner_core,R_xz_inner_core,R_yz_inner_core
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_OR_ATT) :: &
+    epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+    epsilondev_xz_inner_core,epsilondev_yz_inner_core
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STRAIN_ONLY) :: &
+    eps_trace_over_3_inner_core
+
+  ! ADJOINT
+  real(kind=CUSTOM_REAL), dimension(N_SLS,NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_STR_AND_ATT) :: &
+    b_R_xx_inner_core,b_R_yy_inner_core,b_R_xy_inner_core,b_R_xz_inner_core,b_R_yz_inner_core
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
+    b_epsilondev_xx_inner_core,b_epsilondev_yy_inner_core,b_epsilondev_xy_inner_core, &
+    b_epsilondev_xz_inner_core,b_epsilondev_yz_inner_core
+
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
+    b_eps_trace_over_3_inner_core
+
+  ! coupling/boundary surfaces
+  integer :: nspec2D_xmin_inner_core,nspec2D_xmax_inner_core, &
+            nspec2D_ymin_inner_core,nspec2D_ymax_inner_core
+  integer, dimension(NSPEC2DMAX_XMIN_XMAX_IC) :: ibelm_xmin_inner_core,ibelm_xmax_inner_core
+  integer, dimension(NSPEC2DMAX_YMIN_YMAX_IC) :: ibelm_ymin_inner_core,ibelm_ymax_inner_core
+  integer, dimension(NSPEC2D_BOTTOM_IC) :: ibelm_bottom_inner_core
+  integer, dimension(NSPEC2D_TOP_IC) :: ibelm_top_inner_core
+
+  ! adjoint kernels
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_INNER_CORE_ADJOINT) :: &
+    rho_kl_inner_core,beta_kl_inner_core, alpha_kl_inner_core
+
+  ! Boundary Mesh and Kernels
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_ICB) :: icb_kl, icb_kl_top, icb_kl_bot
+  logical :: fluid_solid_boundary
+
+  ! inner / outer elements inner core region
+  integer :: num_phase_ispec_inner_core
+  integer :: nspec_inner_inner_core,nspec_outer_inner_core
+  integer, dimension(:,:), allocatable :: phase_ispec_inner_inner_core
+
+  ! mesh coloring
+  integer :: num_colors_outer_inner_core,num_colors_inner_inner_core
+  integer,dimension(:),allocatable :: num_elem_colors_inner_core
+
+end module specfem_par_innercore
+
+!=====================================================================
+
+module specfem_par_outercore
+
+! parameter module for acoustic solver in outer core region
+
+  use constants_solver
+  implicit none
+
+  ! ----------------- outer core ---------------------
+  ! mesh parameters
+  integer, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: ibool_outer_core
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: &
+    xix_outer_core,xiy_outer_core,xiz_outer_core,&
+    etax_outer_core,etay_outer_core,etaz_outer_core, &
+    gammax_outer_core,gammay_outer_core,gammaz_outer_core
+
+  real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: &
+    xstore_outer_core,ystore_outer_core,zstore_outer_core
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE) :: &
+    rhostore_outer_core,kappavstore_outer_core
+
+  ! mass matrix
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: rmass_outer_core
+
+  ! velocity potential
+  real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE) :: &
+    displ_outer_core,veloc_outer_core,accel_outer_core
+
+  ! ADJOINT
+  real(kind=CUSTOM_REAL), dimension(NGLOB_OUTER_CORE_ADJOINT) :: &
+    b_displ_outer_core,b_veloc_outer_core,b_accel_outer_core
+
+  ! Stacey
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_STACEY) :: vp_outer_core
+  integer :: nspec2D_xmin_outer_core,nspec2D_xmax_outer_core, &
+            nspec2D_ymin_outer_core,nspec2D_ymax_outer_core, &
+            nspec2D_zmin_outer_core
+  integer, dimension(2,NSPEC2DMAX_YMIN_YMAX_OC) :: nimin_outer_core,nimax_outer_core,nkmin_eta_outer_core
+  integer, dimension(2,NSPEC2DMAX_XMIN_XMAX_OC) :: njmin_outer_core,njmax_outer_core,nkmin_xi_outer_core
+
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: absorb_xmin_outer_core, &
+     absorb_xmax_outer_core, absorb_ymin_outer_core, absorb_ymax_outer_core, &
+     absorb_zmin_outer_core
+
+  integer :: reclen_xmin_outer_core, reclen_xmax_outer_core, &
+            reclen_ymin_outer_core, reclen_ymax_outer_core
+  integer :: reclen_zmin
+
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLOB_OUTER_CORE_ADJOINT) :: &
+    vector_accel_outer_core,vector_displ_outer_core,b_vector_displ_outer_core
+
+  ! arrays to couple with the fluid regions by pointwise matching
+  integer, dimension(NSPEC2DMAX_XMIN_XMAX_OC) :: ibelm_xmin_outer_core,ibelm_xmax_outer_core
+  integer, dimension(NSPEC2DMAX_YMIN_YMAX_OC) :: ibelm_ymin_outer_core,ibelm_ymax_outer_core
+  integer, dimension(NSPEC2D_BOTTOM_OC) :: ibelm_bottom_outer_core
+  integer, dimension(NSPEC2D_TOP_OC) :: ibelm_top_outer_core
+
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_OC) :: normal_xmin_outer_core,normal_xmax_outer_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_OC) :: normal_ymin_outer_core,normal_ymax_outer_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: normal_bottom_outer_core
+  real(kind=CUSTOM_REAL), dimension(NDIM,NGLLX,NGLLY,NSPEC2D_TOP_OC) :: normal_top_outer_core
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_BOTTOM_OC) :: jacobian2D_bottom_outer_core
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NSPEC2D_TOP_OC) :: jacobian2D_top_outer_core
+  real(kind=CUSTOM_REAL), dimension(NGLLY,NGLLZ,NSPEC2DMAX_XMIN_XMAX_OC) :: jacobian2D_xmin_outer_core,jacobian2D_xmax_outer_core
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLZ,NSPEC2DMAX_YMIN_YMAX_OC) :: jacobian2D_ymin_outer_core,jacobian2D_ymax_outer_core
+
+  ! adjoint kernels
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: &
+    rho_kl_outer_core,alpha_kl_outer_core
+
+  ! kernel runs
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: &
+    div_displ_outer_core
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_OUTER_CORE_ADJOINT) :: &
+    b_div_displ_outer_core
+
+  ! check for deviatoric kernel for outer core region
+  real(kind=CUSTOM_REAL), dimension(:,:,:,:),allocatable :: beta_kl_outer_core
+  integer :: nspec_beta_kl_outer_core
+  logical,parameter:: deviatoric_outercore = .false.
+
+  ! inner / outer elements outer core region
+  integer :: num_phase_ispec_outer_core
+  integer :: nspec_inner_outer_core,nspec_outer_outer_core
+  integer, dimension(:,:), allocatable :: phase_ispec_inner_outer_core
+
+  ! mesh coloring
+  integer :: num_colors_outer_outer_core,num_colors_inner_outer_core
+  integer,dimension(:),allocatable :: num_elem_colors_outer_core
+
+end module specfem_par_outercore
+
+
+!=====================================================================
+
+module specfem_par_movie
+
+! parameter module for movies/shakemovies
+
+  use constants_solver
+
+  implicit none
+
+  ! to save movie frames
+  integer :: nmovie_points,NIT
+
+  real(kind=CUSTOM_REAL), dimension(:), allocatable :: &
+      store_val_x,store_val_y,store_val_z, &
+      store_val_ux,store_val_uy,store_val_uz
+
+  real(kind=CUSTOM_REAL), dimension(:,:), allocatable :: &
+      store_val_x_all,store_val_y_all,store_val_z_all, &
+      store_val_ux_all,store_val_uy_all,store_val_uz_all
+
+  ! to save movie volume
+  double precision :: scalingval
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_3DMOVIE) :: &
+    muvstore_crust_mantle_3dmovie
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_3DMOVIE) :: &
+    Iepsilondev_xx_crust_mantle,Iepsilondev_yy_crust_mantle,Iepsilondev_xy_crust_mantle, &
+    Iepsilondev_xz_crust_mantle,Iepsilondev_yz_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_3DMOVIE) :: &
+    Ieps_trace_over_3_crust_mantle
+
+  real(kind=CUSTOM_REAL), dimension(:,:,:), allocatable :: nu_3dmovie
+
+  integer :: npoints_3dmovie,nspecel_3dmovie
+  integer, dimension(NGLOB_CRUST_MANTLE_3DMOVIE) :: num_ibool_3dmovie
+
+  logical, dimension(NGLLX,NGLLY,NGLLZ,NSPEC_CRUST_MANTLE_3DMOVIE) :: mask_3dmovie
+  logical, dimension(NGLOB_CRUST_MANTLE_3DMOVIE) :: mask_ibool
+
+  ! vtk run-time visualization
+#ifdef WITH_VTK
+  ! vtk window
+  logical, parameter :: VTK_MODE = .true.
+#else
+  logical, parameter :: VTK_MODE = .false.
+#endif
+  real,dimension(:),allocatable :: vtkdata
+  logical,dimension(:),allocatable :: vtkmask
+  ! multi-mpi processes, gather data arrays on master
+  real,dimension(:),allocatable :: vtkdata_all
+  integer,dimension(:),allocatable :: vtkdata_points_all
+  integer,dimension(:),allocatable :: vtkdata_offset_all
+  integer :: vtkdata_numpoints_all
+  real :: vtkdata_source_x,vtkdata_source_y,vtkdata_source_z
+
+end module specfem_par_movie

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/write_movie_output.f90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/write_movie_output.f90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/write_movie_output.f90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,292 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+  subroutine write_movie_output()
+
+  use specfem_par
+  use specfem_par_crustmantle
+  use specfem_par_innercore
+  use specfem_par_outercore
+  use specfem_par_movie
+  implicit none
+
+  ! local parameters
+  ! debugging
+  character(len=256) :: filename
+  integer,dimension(:),allocatable :: dummy_i
+
+  logical, parameter :: DEBUG_SNAPSHOT = .false.
+
+  logical, parameter :: RUN_EXTERNAL_SCRIPT = .true.
+  character(len=256) :: script_name = "tar_databases_file.sh"
+  character(len=256) :: system_command
+
+  ! save movie on surface
+  if( MOVIE_SURFACE ) then
+    if( mod(it,NTSTEP_BETWEEN_FRAMES) == 0) then
+
+      ! gets resulting array values onto CPU
+      if( GPU_MODE ) then
+        ! transfers whole fields
+        call transfer_displ_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle,Mesh_pointer)
+        call transfer_veloc_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,veloc_crust_mantle,Mesh_pointer)
+      endif
+
+      ! save velocity here to avoid static offset on displacement for movies
+      call write_movie_surface()
+
+    endif
+  endif
+
+
+  ! save movie in full 3D mesh
+  if(MOVIE_VOLUME ) then
+
+    ! updates integral of strain for adjoint movie volume
+    if( MOVIE_VOLUME_TYPE == 2 .or. MOVIE_VOLUME_TYPE == 3 ) then
+      ! transfers strain arrays onto CPU
+      if( GPU_MODE ) then
+        call transfer_strain_cm_from_device(Mesh_pointer,eps_trace_over_3_crust_mantle, &
+                                         epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle, &
+                                         epsilondev_xy_crust_mantle,epsilondev_xz_crust_mantle, &
+                                         epsilondev_yz_crust_mantle)
+      endif
+
+      ! integrates strain
+      call movie_volume_integrate_strain(deltat,size(Ieps_trace_over_3_crust_mantle,4), &
+                                        eps_trace_over_3_crust_mantle, &
+                                        epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle, &
+                                        epsilondev_xy_crust_mantle,epsilondev_xz_crust_mantle, &
+                                        epsilondev_yz_crust_mantle, &
+                                        Ieps_trace_over_3_crust_mantle, &
+                                        Iepsilondev_xx_crust_mantle,Iepsilondev_yy_crust_mantle, &
+                                        Iepsilondev_xy_crust_mantle,Iepsilondev_xz_crust_mantle, &
+                                        Iepsilondev_yz_crust_mantle)
+    endif
+
+    ! file output
+    if( mod(it-MOVIE_START,NTSTEP_BETWEEN_FRAMES) == 0  &
+      .and. it >= MOVIE_START .and. it <= MOVIE_STOP) then
+
+      select case( MOVIE_VOLUME_TYPE )
+      case( 1 )
+        ! output strains
+
+        ! gets resulting array values onto CPU
+        if( GPU_MODE ) then
+          call transfer_strain_cm_from_device(Mesh_pointer, &
+                                eps_trace_over_3_crust_mantle, &
+                                epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle, &
+                                epsilondev_xy_crust_mantle,epsilondev_xz_crust_mantle, &
+                                epsilondev_yz_crust_mantle)
+        endif
+
+        call  write_movie_volume_strains(myrank,npoints_3dmovie, &
+                    LOCAL_TMP_PATH,MOVIE_VOLUME_TYPE,MOVIE_COARSE, &
+                    it,eps_trace_over_3_crust_mantle, &
+                    epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+                    epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+                    muvstore_crust_mantle_3dmovie, &
+                    mask_3dmovie,nu_3dmovie)
+
+      case( 2, 3 )
+        ! output the Time Integral of Strain, or \mu*TIS
+        call  write_movie_volume_strains(myrank,npoints_3dmovie, &
+                    LOCAL_TMP_PATH,MOVIE_VOLUME_TYPE,MOVIE_COARSE, &
+                    it,Ieps_trace_over_3_crust_mantle, &
+                    Iepsilondev_xx_crust_mantle,Iepsilondev_yy_crust_mantle,Iepsilondev_xy_crust_mantle, &
+                    Iepsilondev_xz_crust_mantle,Iepsilondev_yz_crust_mantle, &
+                    muvstore_crust_mantle_3dmovie, &
+                    mask_3dmovie,nu_3dmovie)
+
+      case( 4 )
+        ! output divergence and curl in whole volume
+
+        ! gets resulting array values onto CPU
+        if( GPU_MODE ) then
+          ! strains
+          call transfer_strain_cm_from_device(Mesh_pointer, &
+                                eps_trace_over_3_crust_mantle, &
+                                epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle, &
+                                epsilondev_xy_crust_mantle,epsilondev_xz_crust_mantle, &
+                                epsilondev_yz_crust_mantle)
+          call transfer_strain_ic_from_device(Mesh_pointer, &
+                                eps_trace_over_3_inner_core, &
+                                epsilondev_xx_inner_core,epsilondev_yy_inner_core, &
+                                epsilondev_xy_inner_core,epsilondev_xz_inner_core, &
+                                epsilondev_yz_inner_core)
+          ! wavefields
+          call transfer_fields_oc_from_device(NGLOB_OUTER_CORE, &
+                                displ_outer_core,veloc_outer_core,accel_outer_core,Mesh_pointer)
+        endif
+
+        call write_movie_volume_divcurl(myrank,it,eps_trace_over_3_crust_mantle,&
+                        div_displ_outer_core, &
+                        accel_outer_core,kappavstore_outer_core,rhostore_outer_core,ibool_outer_core, &
+                        eps_trace_over_3_inner_core, &
+                        epsilondev_xx_crust_mantle,epsilondev_yy_crust_mantle,epsilondev_xy_crust_mantle, &
+                        epsilondev_xz_crust_mantle,epsilondev_yz_crust_mantle, &
+                        epsilondev_xx_inner_core,epsilondev_yy_inner_core,epsilondev_xy_inner_core, &
+                        epsilondev_xz_inner_core,epsilondev_yz_inner_core, &
+                        LOCAL_TMP_PATH)
+
+      case( 5 )
+        !output displacement
+        if( GPU_MODE ) then
+          call transfer_displ_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle,Mesh_pointer)
+        endif
+
+        scalingval = scale_displ
+        call write_movie_volume_vector(myrank,it,npoints_3dmovie, &
+                                       LOCAL_TMP_PATH,MOVIE_VOLUME_TYPE,MOVIE_COARSE,ibool_crust_mantle, &
+                                       displ_crust_mantle, &
+                                       scalingval,mask_3dmovie,nu_3dmovie)
+
+      case( 6 )
+        !output velocity
+        if( GPU_MODE ) then
+          call transfer_veloc_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,veloc_crust_mantle,Mesh_pointer)
+        endif
+
+        scalingval = scale_veloc
+        call write_movie_volume_vector(myrank,it,npoints_3dmovie, &
+                                       LOCAL_TMP_PATH,MOVIE_VOLUME_TYPE,MOVIE_COARSE,ibool_crust_mantle, &
+                                       veloc_crust_mantle, &
+                                       scalingval,mask_3dmovie,nu_3dmovie)
+
+      case( 7 )
+        ! output norm of displacement
+
+        ! gets resulting array values onto CPU
+        if( GPU_MODE ) then
+          ! displacement wavefields
+          call transfer_displ_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle,Mesh_pointer)
+          call transfer_displ_ic_from_device(NDIM*NGLOB_INNER_CORE,displ_inner_core,Mesh_pointer)
+          call transfer_displ_oc_from_device(NGLOB_OUTER_CORE,displ_outer_core,Mesh_pointer)
+        endif
+
+        call write_movie_volume_displnorm(myrank,it,LOCAL_TMP_PATH, &
+                        displ_crust_mantle,displ_inner_core,displ_outer_core, &
+                        ibool_crust_mantle,ibool_inner_core,ibool_outer_core)
+
+      case( 8 )
+        ! output norm of velocity
+
+        ! gets resulting array values onto CPU
+        if( GPU_MODE ) then
+          ! velocity wavefields
+          call transfer_veloc_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,veloc_crust_mantle,Mesh_pointer)
+          call transfer_veloc_ic_from_device(NDIM*NGLOB_INNER_CORE,veloc_inner_core,Mesh_pointer)
+          call transfer_veloc_oc_from_device(NGLOB_OUTER_CORE,veloc_outer_core,Mesh_pointer)
+        endif
+
+        call write_movie_volume_velnorm(myrank,it,LOCAL_TMP_PATH, &
+                        veloc_crust_mantle,veloc_inner_core,veloc_outer_core, &
+                        ibool_crust_mantle,ibool_inner_core,ibool_outer_core)
+
+      case( 9 )
+        ! output norm of acceleration
+
+        ! gets resulting array values onto CPU
+        if( GPU_MODE ) then
+          ! acceleration wavefields
+          call transfer_accel_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,accel_crust_mantle,Mesh_pointer)
+          call transfer_accel_ic_from_device(NDIM*NGLOB_INNER_CORE,accel_inner_core,Mesh_pointer)
+          call transfer_accel_oc_from_device(NGLOB_OUTER_CORE,accel_outer_core,Mesh_pointer)
+        endif
+
+        call write_movie_volume_accelnorm(myrank,it,LOCAL_TMP_PATH, &
+                        accel_crust_mantle,accel_inner_core,accel_outer_core, &
+                        ibool_crust_mantle,ibool_inner_core,ibool_outer_core)
+
+      case default
+        call exit_MPI(myrank, 'MOVIE_VOLUME_TYPE has to be in range from 1 to 9')
+
+      end select ! MOVIE_VOLUME_TYPE
+
+      ! executes an external script on the node
+      if( RUN_EXTERNAL_SCRIPT ) then
+        call sync_all()
+        if( myrank == 0 ) then
+          write(system_command,"('./',a,1x,i6.6,' >& out.',i6.6,'.log &')") trim(script_name),it,it
+          !print*,trim(system_command)
+          call system(system_command)
+        endif
+      endif
+    endif
+  endif ! MOVIE_VOLUME
+
+  ! debugging
+  if( DEBUG_SNAPSHOT ) then
+    if( mod(it-MOVIE_START,NTSTEP_BETWEEN_FRAMES) == 0  &
+      .and. it >= MOVIE_START .and. it <= MOVIE_STOP) then
+
+      !output displacement
+      if( GPU_MODE ) then
+        call transfer_displ_cm_from_device(NDIM*NGLOB_CRUST_MANTLE,displ_crust_mantle,Mesh_pointer)
+        call transfer_displ_ic_from_device(NDIM*NGLOB_INNER_CORE,displ_inner_core,Mesh_pointer)
+      endif
+
+      ! VTK file output
+      ! displacement values
+
+      ! crust mantle
+      allocate(dummy_i(NSPEC_CRUST_MANTLE))
+      dummy_i(:) = IFLAG_CRUST
+      ! one file per process
+      write(prname,'(a,i6.6,a)') 'OUTPUT_FILES/snapshot_proc',myrank,'_'
+      write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_1_displ_',it
+      call write_VTK_data_cr(dummy_i,NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
+                          xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle, &
+                          displ_crust_mantle,filename)
+      ! single file for all
+      !write(prname,'(a)') 'OUTPUT_FILES/snapshot_all_'
+      !write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_1_displ_',it
+      !call write_VTK_data_cr_all(myrank,NPROCTOT_VAL,dummy_i, &
+      !                    NSPEC_CRUST_MANTLE,NGLOB_CRUST_MANTLE, &
+      !                    xstore_crust_mantle,ystore_crust_mantle,zstore_crust_mantle,ibool_crust_mantle, &
+      !                    displ_crust_mantle,filename)
+      deallocate(dummy_i)
+
+      ! inner core
+      ! one file per process
+      !write(prname,'(a,i6.6,a)') trim(LOCAL_TMP_PATH)//'/'//'proc',myrank,'_'
+      !write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_3_displ_',it
+      !call write_VTK_data_cr(idoubling_inner_core,NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+      !                    xstore_inner_core,ystore_inner_core,zstore_inner_core,ibool_inner_core, &
+      !                    displ_inner_core,filename)
+      ! single file for all
+      !write(prname,'(a)') 'OUTPUT_FILES/snapshot_all_'
+      !write(filename,'(a,a,i6.6)') prname(1:len_trim(prname)),'reg_3_displ_',it
+      !call write_VTK_data_cr_all(myrank,NPROCTOT_VAL,idoubling_inner_core, &
+      !                    NSPEC_INNER_CORE,NGLOB_INNER_CORE, &
+      !                    xstore_inner_core,ystore_inner_core,zstore_inner_core,ibool_inner_core, &
+      !                    displ_inner_core,filename)
+    endif
+  endif
+
+  end subroutine write_movie_output

Added: seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/write_specfem_adios_header.F90
===================================================================
--- seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/write_specfem_adios_header.F90	                        (rev 0)
+++ seismo/3D/SPECFEM3D_GLOBE/trunk/src/specfem3D/write_specfem_adios_header.F90	2013-07-01 01:39:52 UTC (rev 22470)
@@ -0,0 +1,761 @@
+!=====================================================================
+!
+!          S p e c f e m 3 D  G l o b e  V e r s i o n  5 . 1
+!          --------------------------------------------------
+!
+!          Main authors: Dimitri Komatitsch and Jeroen Tromp
+!                        Princeton University, USA
+!             and University of Pau / CNRS / INRIA, France
+! (c) Princeton University / California Institute of Technology and University of Pau / CNRS / INRIA
+!                            April 2011
+!
+! This program is free software; you can redistribute it and/or modify
+! it under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 2 of the License, or
+! (at your option) any later version.
+!
+! This program is distributed in the hope that it will be useful,
+! but WITHOUT ANY WARRANTY; without even the implied warranty of
+! MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+! GNU General Public License for more details.
+!
+! You should have received a copy of the GNU General Public License along
+! with this program; if not, write to the Free Software Foundation, Inc.,
+! 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+!
+!=====================================================================
+
+
+!> \file write_par_header_ADIOS.F90
+!! \brief Write in the adios file a group with all the parameters that insure
+!!        reproductibility
+
+#include "config.fh"
+
+!
+!-------------------------------------------------------------------------------
+!
+
+!> @brief Write simulation parameters into ADIOS result file header.
+!!
+!! Write the ADIOS header containing values to ensure reproductibility of
+!! the simulation. These values come form the following files :
+!! DATA/Par_file, DATA/CMTSOLUTION, DATA/STATIONS
+subroutine write_specfem_header_adios()
+  use mpi
+  use adios_write_mod
+  use specfem_par, only : myrank, NSOURCES
+
+  implicit none
+  include "constants.h"
+
+  !-------------------------------------------------------------------
+  ! local parameters
+  !-------------------------------------------------------------------
+  ! parameters read from parameter file (cf. DATA/Par_file)
+  integer  :: NTSTEP_BETWEEN_OUTPUT_SEISMOS,NTSTEP_BETWEEN_READ_ADJSRC,        &
+      NTSTEP_BETWEEN_FRAMES, NTSTEP_BETWEEN_OUTPUT_INFO,NUMBER_OF_RUNS,        &
+      NUMBER_OF_THIS_RUN,NCHUNKS,SIMULATION_TYPE, MOVIE_VOLUME_TYPE,           &
+      MOVIE_START,MOVIE_STOP, NEX_XI,NEX_ETA,NPROC_XI,NPROC_ETA,               &
+      NOISE_TOMOGRAPHY
+
+  double precision :: ANGULAR_WIDTH_XI_IN_DEGREES,ANGULAR_WIDTH_ETA_IN_DEGREES,&
+      CENTER_LONGITUDE_IN_DEGREES,CENTER_LATITUDE_IN_DEGREES,                  &
+      GAMMA_ROTATION_AZIMUTH, HDUR_MOVIE,MOVIE_TOP_KM,MOVIE_BOTTOM_KM,         &
+      MOVIE_EAST_DEG,MOVIE_WEST_DEG,MOVIE_NORTH_DEG,MOVIE_SOUTH_DEG,           &
+      RECORD_LENGTH_IN_MINUTES
+
+  logical :: ELLIPTICITY,GRAVITY,ROTATION,TOPOGRAPHY,OCEANS, MOVIE_SURFACE,    &
+      MOVIE_VOLUME,MOVIE_COARSE, RECEIVERS_CAN_BE_BURIED,                      &
+      PRINT_SOURCE_TIME_FUNCTION, SAVE_MESH_FILES,ATTENUATION,ATTENUATION_NEW, &
+      ABSORBING_CONDITIONS,SAVE_FORWARD, OUTPUT_SEISMOS_ASCII_TEXT,            &
+      OUTPUT_SEISMOS_SAC_ALPHANUM,OUTPUT_SEISMOS_SAC_BINARY,                   &
+      ROTATE_SEISMOGRAMS_RT,WRITE_SEISMOGRAMS_BY_MASTER,                       &
+      SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE
+
+  ! values from CMTSOLUTION -------------------------------
+  ! integer          :: NSOURCES -> in specfem_par module
+  integer,           dimension(NSOURCES) :: yr, mo, da, ho, mi
+  double precision,  dimension(NSOURCES) :: sec, t_shift, hdur, lat, long, depth
+  double precision,  dimension(NSOURCES) :: mrr, mtt, mpp, mrt, mrp, mtp 
+  integer :: event_name_length, datasource_length
+  character(len=16):: event_name
+  character(len=:), allocatable  :: datasource  ! F03 feature
+
+  ! values from STATIONS ----------------------------------
+  integer :: NSTATIONS
+  integer :: station_name_length, network_name_length ! for later reading
+  character(len=:),   allocatable :: station_name, network_name 
+  double precision, allocatable, dimension(:) :: stlat, stlon, stele, stbur
+
+  character(len=150) :: OUTPUT_FILES,LOCAL_PATH,LOCAL_TMP_PATH,MODEL
+
+  ! Adios variables
+  integer                 :: adios_err
+  integer(kind=8)         :: adios_group, adios_handle, varid
+  integer(kind=8)         :: adios_groupsize, adios_totalsize
+  ! TODO: find a better name once the use of ADIOS is more completely
+  !       implemented
+  character(len=256):: filename = "OUTPUT_FILES/header_specfem3d_globe.bp" 
+  integer(kind=8)   :: group_size_inc
+  integer           :: model_length ! for later reading of MODEL
+  integer           :: isource, irec, ier
+
+
+  ! only the master needs to read the values to be written 
+  if(myrank == 0) then
+    call adios_declare_group (adios_group, "SPECFEM3D_GLOBE_HEADER",           &
+                              "", 0, adios_err)
+    call adios_select_method (adios_group, "MPI", "", "", adios_err)
+
+    group_size_inc = 0 ! Adios group size. Incremented by adios_helpers
+
+    !-- *** Define variables used to configure specfem
+    call define_solver_info_variables (adios_group, group_size_inc)
+
+    !--*** Values read from DATA/Par_file ***
+    ! extract all unmodified values from the Par_file
+    call read_parameter_file(OUTPUT_FILES,                                     & 
+        LOCAL_PATH, LOCAL_TMP_PATH, MODEL,                                     & 
+        NTSTEP_BETWEEN_OUTPUT_SEISMOS, NTSTEP_BETWEEN_READ_ADJSRC,             &
+        NTSTEP_BETWEEN_FRAMES, NTSTEP_BETWEEN_OUTPUT_INFO, NUMBER_OF_RUNS,     & 
+        NUMBER_OF_THIS_RUN, NCHUNKS, SIMULATION_TYPE, MOVIE_VOLUME_TYPE,       &
+        MOVIE_START, MOVIE_STOP, NEX_XI, NEX_ETA, NPROC_XI, NPROC_ETA,         & 
+        ANGULAR_WIDTH_XI_IN_DEGREES, ANGULAR_WIDTH_ETA_IN_DEGREES,             & 
+        CENTER_LONGITUDE_IN_DEGREES, CENTER_LATITUDE_IN_DEGREES,               &
+        GAMMA_ROTATION_AZIMUTH, HDUR_MOVIE, MOVIE_TOP_KM, MOVIE_BOTTOM_KM,     &
+        RECORD_LENGTH_IN_MINUTES, MOVIE_EAST_DEG, MOVIE_WEST_DEG,              &
+        MOVIE_NORTH_DEG, MOVIE_SOUTH_DEG, ELLIPTICITY, GRAVITY, ROTATION,      &
+        TOPOGRAPHY, OCEANS, MOVIE_SURFACE, MOVIE_VOLUME, MOVIE_COARSE,         & 
+        RECEIVERS_CAN_BE_BURIED, PRINT_SOURCE_TIME_FUNCTION, SAVE_MESH_FILES,  &
+        ATTENUATION, ATTENUATION_NEW, ABSORBING_CONDITIONS, SAVE_FORWARD,      & 
+        OUTPUT_SEISMOS_ASCII_TEXT, OUTPUT_SEISMOS_SAC_ALPHANUM,                &
+        OUTPUT_SEISMOS_SAC_BINARY, ROTATE_SEISMOGRAMS_RT,                      &
+        WRITE_SEISMOGRAMS_BY_MASTER, SAVE_ALL_SEISMOS_IN_ONE_FILE,             &
+        USE_BINARY_FOR_LARGE_FILE, NOISE_TOMOGRAPHY)
+
+    model_length = len(MODEL)
+    ! define adios variables for the Par_file
+    call define_par_file_variables (adios_group, group_size_inc, model_length)
+
+    !--*** Values read from DATA/CMTSOLUTION ***--
+    call read_raw_cmtsolution (yr, mo, da, ho, mi, sec, t_shift, hdur, lat,    &
+        long, depth, mrr, mtt, mpp, mrt, mrp, mtp, event_name_length,          &
+        event_name, datasource_length,  datasource)
+    call define_cmtsolution_variables (adios_group, group_size_inc, NSOURCES,  &
+        event_name_length, datasource_length)
+
+    !--*** Values read from DATA/STATIONS
+    call read_raw_stations (NSTATIONS, stlat, stlon, stele, stbur,             &
+        station_name_length, station_name, network_name_length, network_name)
+    call define_stations_variables (adios_group, group_size_inc, NSTATIONS,    &
+        station_name_length, network_name_length)
+
+    ! open the file where the headers have to be written
+    call adios_open (adios_handle, "SPECFEM3D_GLOBE_HEADER", filename, "w",    &
+                     MPI_COMM_SELF, adios_err);
+    ! The group size have been auto-incremented
+    adios_groupsize = group_size_inc
+    call adios_group_size (adios_handle, adios_groupsize,                      &
+                           adios_totalsize, adios_err)
+
+    ! Write variables from 'config.h'
+    call write_adios_solver_info_variables (adios_handle)
+
+    ! Write variables from 'Par_file'
+    call write_adios_par_file_variables (adios_handle,                         &
+        ANGULAR_WIDTH_XI_IN_DEGREES, ANGULAR_WIDTH_ETA_IN_DEGREES,             &
+        CENTER_LONGITUDE_IN_DEGREES, CENTER_LATITUDE_IN_DEGREES,               &
+        GAMMA_ROTATION_AZIMUTH, HDUR_MOVIE, MOVIE_TOP_KM, MOVIE_BOTTOM_KM,     &
+        MOVIE_EAST_DEG, MOVIE_WEST_DEG, MOVIE_NORTH_DEG, MOVIE_SOUTH_DEG,      &
+        RECORD_LENGTH_IN_MINUTES, NTSTEP_BETWEEN_OUTPUT_SEISMOS,               &
+        NTSTEP_BETWEEN_READ_ADJSRC, NTSTEP_BETWEEN_FRAMES,                     &
+        NTSTEP_BETWEEN_OUTPUT_INFO, NUMBER_OF_RUNS, NUMBER_OF_THIS_RUN,NCHUNKS,&
+        SIMULATION_TYPE, MOVIE_VOLUME_TYPE, MOVIE_START, MOVIE_STOP, NEX_XI,   &
+        NEX_ETA, NPROC_XI, NPROC_ETA, NOISE_TOMOGRAPHY, ELLIPTICITY, GRAVITY,  &
+        ROTATION, TOPOGRAPHY, OCEANS, MOVIE_SURFACE, MOVIE_VOLUME,MOVIE_COARSE,&
+        RECEIVERS_CAN_BE_BURIED, PRINT_SOURCE_TIME_FUNCTION, SAVE_MESH_FILES,  &
+        ATTENUATION, ATTENUATION_NEW, ABSORBING_CONDITIONS, SAVE_FORWARD,      &
+        OUTPUT_SEISMOS_ASCII_TEXT, OUTPUT_SEISMOS_SAC_ALPHANUM,                &
+        OUTPUT_SEISMOS_SAC_BINARY, ROTATE_SEISMOGRAMS_RT,                      &
+        WRITE_SEISMOGRAMS_BY_MASTER, SAVE_ALL_SEISMOS_IN_ONE_FILE,             &
+        USE_BINARY_FOR_LARGE_FILE, model_length, MODEL)
+
+    ! Write variables from 'CMTSOLUTION'
+    call write_adios_cmtsolution_variables (adios_handle,                      &
+        NSOURCES, yr, mo, da, ho, mi, sec, t_shift, hdur, lat, long, depth,    &
+        mrr, mtt, mpp, mrt, mrp, mtp, event_name_length, event_name,           &
+        datasource_length, datasource)
+
+    ! Write variables from 'STATIONS' 
+    call write_adios_stations_variables (adios_handle,                         &
+       NSTATIONS, stlat, stlon, stele, stbur, station_name_length,             &
+       station_name, network_name_length, network_name)
+
+    call adios_close (adios_handle, adios_err)
+
+    deallocate(datasource)
+    deallocate(station_name)
+    deallocate(network_name)
+    deallocate(stlat)
+    deallocate(stlon)
+    deallocate(stele)
+    deallocate(stbur)
+  endif
+
+! Imbricated/contained subroutines. The initial thougth was to do a module with
+! public access to the write_specfem_header_adios routine and private access to
+! the other routines. The problem then is the files compilation order that 
+! should be done very carefully. This require modifications of the Makefile
+! which is not currently designed to do that.
+contains
+
+!> \brief Define ADIOS variable to store values from 'setup/config.h'. Store
+!!        configuration parameters to insure reproductibility
+!! \param adios_group The ADIOS entity grouping variables for data transferts
+!! \param group_size_inc The group size to increment wrt. the variable size
+subroutine define_solver_info_variables (adios_group, group_size_inc)
+  implicit none
+  ! Parameters
+  integer(kind=8), intent(in)    :: adios_group
+  integer(kind=8), intent(inout) :: group_size_inc
+  ! Variables
+  integer :: pkg_str_len, conf_flags_len
+ 
+  pkg_str_len    = len_trim(PACKAGE_STRING)
+  conf_flags_len = len_trim(CONFIGURE_FLAGS)
+  call define_adios_integer_scalar (adios_group, "package_string_length", &
+      "/solver_info", group_size_inc)
+  call define_adios_string (adios_group, "package_name", "/solver_info", &
+      pkg_str_len, group_size_inc)
+  call define_adios_integer_scalar (adios_group, "conf_flags_len", &
+      "/solver_info", group_size_inc)
+  call define_adios_string (adios_group, "conf_flags", "/solver_info", &
+      conf_flags_len, group_size_inc)
+end subroutine define_solver_info_variables
+
+!> \brief Define ADIOS variable to store values from the Par_file
+!! \param adios_group The ADIOS entity grouping variables for data transferts
+!! \param group_size_inc The group size to increment wrt. the variable size
+!! \param model_length The number of character of the MODEL string.
+!!                     Usefull for reading back the MODEL
+subroutine define_par_file_variables (adios_group, group_size_inc, model_length)
+  implicit none
+  ! Parameters
+  integer(kind=8), intent(in)    :: adios_group
+  integer(kind=8), intent(inout) :: group_size_inc
+  integer, intent(in)            :: model_length ! for later reading of MODEL
+
+  !-- double precision variables
+  call define_adios_double_scalar (adios_group, "ANGULAR_WIDTH_XI_IN_DEGREES", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_double_scalar (adios_group, "ANGULAR_WIDTH_ETA_IN_DEGREES", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_double_scalar (adios_group, "CENTER_LONGITUDE_IN_DEGREES", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_double_scalar (adios_group, "CENTER_LATITUDE_IN_DEGREES", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_double_scalar (adios_group, "GAMMA_ROTATION_AZIMUTH", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_double_scalar (adios_group, "HDUR_MOVIE", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_double_scalar (adios_group, "MOVIE_TOP_KM", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_double_scalar (adios_group, "MOVIE_BOTTOM_KM", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_double_scalar (adios_group, "MOVIE_EAST_DEG", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_double_scalar (adios_group, "MOVIE_WEST_DEG", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_double_scalar (adios_group, "MOVIE_NORTH_DEG", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_double_scalar (adios_group, "MOVIE_SOUTH_DEG", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_double_scalar (adios_group, "RECORD_LENGTH_IN_MINUTES", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  !-- integer variables 
+  call define_adios_integer_scalar (adios_group, "NTSTEP_BETWEEN_OUTPUT_SEISMOS", &
+      "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "NTSTEP_BETWEEN_READ_ADJSRC", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "NTSTEP_BETWEEN_FRAMES", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "NTSTEP_BETWEEN_OUTPUT_INFO", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "NUMBER_OF_RUNS", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "NUMBER_OF_THIS_RUN", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "NCHUNKS", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "SIMULATION_TYPE", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "MOVIE_VOLUME_TYPE", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "MOVIE_START", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "MOVIE_STOP", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "NEX_XI", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "NEX_ETA", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "NPROC_XI", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "NPROC_ETA", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "NOISE_TOMOGRAPHY", "/specfem3D_globe_parameter_file", group_size_inc)
+  !-- logical variables
+  call define_adios_byte_scalar (adios_group, "ELLIPTICITY", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "GRAVITY", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "ROTATION", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "TOPOGRAPHY", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "OCEANS", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "MOVIE_SURFACE", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "MOVIE_VOLUME", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "MOVIE_COARSE", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "RECEIVERS_CAN_BE_BURIED", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "PRINT_SOURCE_TIME_FUNCTION", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "SAVE_MESH_FILES", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "ATTENUATION", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "ATTENUATION_NEW", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "ABSORBING_CONDITIONS", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "SAVE_FORWARD", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "OUTPUT_SEISMOS_ASCII_TEXT", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "OUTPUT_SEISMOS_SAC_ALPHANUM", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "OUTPUT_SEISMOS_SAC_BINARY", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "ROTATE_SEISMOGRAMS_RT", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "WRITE_SEISMOGRAMS_BY_MASTER", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "SAVE_ALL_SEISMOS_IN_ONE_FILE", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_byte_scalar (adios_group, "USE_BINARY_FOR_LARGE_FILE", "/specfem3D_globe_parameter_file", group_size_inc)
+  !-- string variables
+  call define_adios_integer_scalar (adios_group, "model_length", "/specfem3D_globe_parameter_file", group_size_inc)
+  call define_adios_string (adios_group, "MODEL", "/specfem3D_globe_parameter_file", model_length, group_size_inc)
+end subroutine define_par_file_variables
+
+
+!> \brief Define ADIOS variable to store values from the CMTSOLUTION file
+!! \param adios_group The ADIOS entity grouping variables for data transferts
+!! \param group_size_inc The group size to increment wrt. the variable size
+!! \param NSOURCES The number of sources. Needed to define array sizes.
+!! \param datasource_length The number of character of the datasource string.
+!!                          Usefull for reading back the datasources.
+subroutine define_cmtsolution_variables (adios_group, group_size_inc, NSOURCES,&
+    event_name_length, datasource_length)
+  implicit none
+  integer(kind=8), intent(in)    :: adios_group
+  integer(kind=8), intent(inout) :: group_size_inc
+  integer, intent(in) :: NSOURCES, datasource_length, event_name_length
+
+  !-- Number of SOURCES inside the CMTSOLUTION file
+  call define_adios_integer_scalar (adios_group, "NSOURCES", "/CMTSOLUTION", group_size_inc)
+  !-- double precision arrays
+  call define_adios_double_local_array1D (adios_group, "second", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "time_shift", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "half_duration", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "latitude", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "longitude", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "depth", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "mrr", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "mtt", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "mpp", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "mrt", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "mrp", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "mtp", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  !-- integer arrays
+  call define_adios_integer_local_array1D (adios_group, "year", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_integer_local_array1D (adios_group, "month", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_integer_local_array1D (adios_group, "day", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_integer_local_array1D (adios_group, "hour", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  call define_adios_integer_local_array1D (adios_group, "minute", "/CMTSOLUTION", NSOURCES, "NSOURCES", group_size_inc)
+  !-- string
+  call define_adios_integer_scalar (adios_group, "event_name_length", "/CMTSOLUTION", group_size_inc)
+  call define_adios_string (adios_group, "event_name", "/CMTSOLUTION", event_name_length, group_size_inc)
+  call define_adios_integer_scalar (adios_group, "datasource_length", "/CMTSOLUTION", group_size_inc)
+  call define_adios_string (adios_group, "datasource", "/CMTSOLUTION", datasource_length, group_size_inc)
+end subroutine define_cmtsolution_variables
+
+!> \brief Define ADIOS variable to store values from the STATIONS file
+!! \param adios_group The ADIOS entity grouping variables for data transferts
+!! \param group_size_inc The group size to increment wrt. the variable size
+!! \param NSTATIONS The number of stations. Needed to define array sizes.
+!! \param station_name_length The number of character of the station_name
+!!                            string.  Usefull for reading back the stations.
+!! \param network_name_length The number of character of the station_name
+!!                            string.  Usefull for reading back the networks.
+subroutine define_stations_variables (adios_group, group_size_inc, NSTATIONS,&
+    station_name_length, network_name_length)
+  implicit none
+  integer(kind=8), intent(in)    :: adios_group
+  integer(kind=8), intent(inout) :: group_size_inc
+  integer, intent(in) :: NSTATIONS, station_name_length, network_name_length
+
+  !-- Number of STATIONS inside the STATIONS file 
+  call define_adios_integer_scalar (adios_group, "NSTATIONS", "/STATIONS", group_size_inc)
+  !-- double precision arrays 
+  call define_adios_double_local_array1D (adios_group, "station_latitude", "/STATIONS", NSTATIONS, "NSTATIONS", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "station_longitude", "/STATIONS", NSTATIONS, "NSTATIONS", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "station_elevation", "/STATIONS", NSTATIONS, "NSTATIONS", group_size_inc)
+  call define_adios_double_local_array1D (adios_group, "station_burial", "/STATIONS", NSTATIONS, "NSTATIONS", group_size_inc)
+  !-- string
+  call define_adios_integer_scalar (adios_group, "station_name_length", "/STATIONS", group_size_inc)
+  call define_adios_integer_scalar (adios_group, "network_name_length", "/STATIONS", group_size_inc)
+  call define_adios_string (adios_group, "station_name", "/STATIONS", station_name_length, group_size_inc)
+  call define_adios_string (adios_group, "network_name", "/STATIONS", network_name_length, group_size_inc)
+end subroutine define_stations_variables
+
+!> \brief Read the 'CMTSOLUTION file' and do not modify nor transform variables
+!! \param yr Array to store the year of the events
+!! \param mo Array to store the month of the events
+!! \param da Array to store the day of the events
+!! \param ho Array to store the hour of the events
+!! \param mi Array to store the minute of the events
+!! \param sec Array to store the second of the events
+!! \param t_shift Array to store the time shift at the beginning of the events
+!! \param hdur Array to store the duration of the events
+!! \param lat Array to store the latitude of the events
+!! \param long Array to store the longitude of the events
+!! \param depth Arrays to store the depth of the events
+!! \param mrr Arrays to store the mrr component of the events
+!! \param mtt Arrays to store the mtt component of the events
+!! \param mpp Arrays to store the mpp component of the events
+!! \param mrt Arrays to store the mrt component of the events
+!! \param mrp Arrays to store the mrp component of the events
+!! \param mtp Arrays to store the mtp component of the events
+!! \param event_name_length Variable for keeping the size of the event_name
+!!                          string
+!! \param event_name Strings to store the event name
+!! \param  datasource_length Variable for keeping the size of the datasource 
+!!                          string
+!! \param datasource String in which the different datasource names are
+!!                   concatenated
+!> \note This subroutine and get_cmt.f90 are redundant. Might be factorized in
+!!       the future. For now we do not want the value modification from get_cmt
+subroutine read_raw_cmtsolution (yr, mo, da, ho, mi, sec, t_shift, hdur, lat,  &
+    long, depth, mrr, mtt, mpp, mrt, mrp, mtp, event_name_length, event_name,  &
+    datasource_length, datasource)
+  implicit none
+  ! Parameters
+  integer,           dimension(NSOURCES), intent(out) :: yr, mo, da, ho, mi
+  double precision,  dimension(NSOURCES), intent(out) :: sec, t_shift, hdur, lat, long, depth
+  double precision,  dimension(NSOURCES), intent(out) :: mrr, mtt, mpp, mrt, mrp, mtp 
+  integer, intent(inout) :: event_name_length, datasource_length
+  character(len=16), intent(out) :: event_name
+  character(len=:), allocatable, intent(out)  :: datasource  ! F03 feature
+  ! Local variables
+  character(len=5)   :: datasource_tmp
+  character(len=256) :: CMTSOLUTION, string
+  ! extract all unmodified values from CMTSOLUTION
+  ! get_cmt() routine modify the read values
+  ! TODO factorize what follows and get_cmt.f90 and probably one or two other
+  !      routines
+  call get_value_string(CMTSOLUTION, 'solver.CMTSOLUTION', 'DATA/CMTSOLUTION')
+  open(unit=1,file=CMTSOLUTION,status='old',action='read')
+  datasource_length = 4*NSOURCES ! a datasource is 4 character, by convention 
+  allocate(character(len=(datasource_length)) :: datasource, stat=ier)
+  if (ier /=0) &
+      call exit_MPI (myrank, &
+          "error allocating datasource string for adios header")
+  datasource = ""
+  ! ADIOS only  (1) byte for a string. This may cause data overwriting.
+  ! => increase the generate by the string size -1
+  adios_groupsize = adios_groupsize + 4*NSOURCES - 1
+  do isource=1,NSOURCES
+
+    read(1,"(a256)") string
+    ! skips empty lines
+    do while( len_trim(string) == 0 )
+    read(1,"(a256)") string
+    enddo
+    ! read header with event information
+    read(string,"(a4,i5,i3,i3,i3,i3,f6.2)") datasource_tmp,yr(isource), &
+        mo(isource),da(isource),ho(isource),mi(isource),sec(isource)
+    datasource = datasource // datasource_tmp
+    ! read event name 
+    read(1,"(a)") string
+    read(string(12:len_trim(string)),*) event_name
+    ! read time shift
+    read(1,"(a)") string
+    read(string(12:len_trim(string)),*) t_shift(isource)
+    ! read half duration
+    read(1,"(a)") string
+    read(string(15:len_trim(string)),*) hdur(isource)
+    ! read latitude
+    read(1,"(a)") string
+    read(string(10:len_trim(string)),*) lat(isource)
+    ! read longitude
+    read(1,"(a)") string
+    read(string(11:len_trim(string)),*) long(isource)
+    ! read depth
+    read(1,"(a)") string
+    read(string(7:len_trim(string)),*) depth(isource)
+    ! read Mrr
+    read(1,"(a)") string
+    read(string(5:len_trim(string)),*) mrr(isource)
+    ! read Mtt
+    read(1,"(a)") string
+    read(string(5:len_trim(string)),*) mtt(isource)
+    ! read Mpp
+    read(1,"(a)") string
+    read(string(5:len_trim(string)),*) mpp(isource)
+    ! read Mrt
+    read(1,"(a)") string
+    read(string(5:len_trim(string)),*) mrt(isource)
+    ! read Mrp
+    read(1,"(a)") string
+    read(string(5:len_trim(string)),*) mrp(isource)
+    ! read Mtp
+    read(1,"(a)") string
+    read(string(5:len_trim(string)),*) mtp(isource)
+  enddo
+  close(1)
+  event_name_length = len_trim(event_name)
+end subroutine read_raw_cmtsolution
+
+!> \brief Reads information form the 'STATIONS' file without modifying anything
+!! \param NSTATIONS How many stations are used
+!! \param stlat Array to store the latitude of the stations
+!! \param stlon Array to store the longitude of the stations
+!! \param stele Array to store the elevation of the stations
+!! \param stbur Array to store the burial of the statisons
+!! \param station_name_length Variable to keep the length of the station_name
+!!                            string
+!! \param station_name  String in which the different station names are
+!!                      concatenated
+!! \param network_name_length Variable to keep the length of the network_name
+!!                            string
+!! \param network_name String in which the different network names are
+!!                     concatenated
+subroutine read_raw_stations (NSTATIONS, stlat, stlon, stele, stbur,           &
+    station_name_length, station_name, network_name_length, network_name)
+  implicit none
+  ! Parameters
+  integer :: NSTATIONS
+  integer, intent(inout) :: station_name_length, network_name_length ! for later reading
+  character(len=:), allocatable, intent(out) :: station_name, network_name 
+  double precision, allocatable, dimension(:), intent(out) :: stlat, stlon, stele, stbur
+  ! Local variables
+  character(len=MAX_LENGTH_STATION_NAME) :: station_name_tmp
+  character(len=MAX_LENGTH_NETWORK_NAME) :: network_name_tmp
+  character(len=256) :: STATIONS, string
+
+  ! Extract values from STATIONS File
+  call get_value_string(STATIONS, 'solver.STATIONS', 'DATA/STATIONS')
+  open(unit=1,file=STATIONS,iostat=ier,status='old',action='read')
+  NSTATIONS = 0
+  do while(ier == 0)
+  read(1,"(a)",iostat=ier) string
+  if(ier == 0) NSTATIONS = NSTATIONS + 1
+  enddo
+  allocate (character (len=(MAX_LENGTH_STATION_NAME*NSTATIONS)) :: station_name)
+  allocate (character (len=(MAX_LENGTH_NETWORK_NAME*NSTATIONS)) :: network_name)
+  allocate (stlat (NSTATIONS)) 
+  allocate (stlon (NSTATIONS)) 
+  allocate (stele (NSTATIONS)) 
+  allocate (stbur (NSTATIONS)) 
+  station_name = ""
+  network_name = ""
+  rewind(1)
+  do irec = 1,NSTATIONS
+  read(1,*,iostat=ier) station_name_tmp, network_name_tmp, &
+  stlat(irec),      stlon(irec),      &
+  stele(irec),      stbur(irec)
+  if( ier /= 0 ) then
+    write(IMAIN,*) 'error reading in station ',irec
+    call exit_MPI(myrank,'error reading in station in STATIONS file')
+  endif
+  station_name = station_name // trim(station_name_tmp) // " " 
+  network_name = network_name // trim(network_name_tmp) // " " 
+  enddo
+  close(1)
+  station_name = trim(station_name)
+  network_name = trim(network_name)
+  station_name_length = len(station_name)
+  network_name_length = len(network_name)
+end subroutine read_raw_stations
+
+!> \brief Wrapper to write the 'config.h' variables into the adios header
+!! \param adios_handle The handle to the file where the variable should be
+!!                     written
+subroutine write_adios_solver_info_variables (adios_handle)
+  implicit none
+  ! Parameters
+  integer(kind=8), intent(in)    :: adios_handle
+  ! Variables
+  integer :: pkg_str_len, conf_flags_len, adios_err
+  character(len=:), allocatable :: pkg_str
+  character(len=:), allocatable :: conf_flags
+ 
+  pkg_str    = trim(PACKAGE_STRING)
+  conf_flags = trim(CONFIGURE_FLAGS)
+
+  pkg_str_len    = len_trim(PACKAGE_STRING)
+  conf_flags_len = len_trim(CONFIGURE_FLAGS)
+  call adios_write (adios_handle, "package_string_length", pkg_str_len, adios_err)
+  call adios_write (adios_handle, "package_name", pkg_str, adios_err)
+  call adios_write (adios_handle, "conf_flags_len", conf_flags_len, adios_err)
+  call adios_write (adios_handle, "conf_flags", conf_flags, adios_err)
+end subroutine write_adios_solver_info_variables 
+
+!> \brief Wrapper to write the 'Par_file' variables into the adios header
+!! \param adios_handle The handle to the file where the variable should be
+!!                     written
+subroutine write_adios_par_file_variables (adios_handle,                       &
+    ANGULAR_WIDTH_XI_IN_DEGREES, ANGULAR_WIDTH_ETA_IN_DEGREES,                 &
+    CENTER_LONGITUDE_IN_DEGREES, CENTER_LATITUDE_IN_DEGREES,                   &
+    GAMMA_ROTATION_AZIMUTH, HDUR_MOVIE, MOVIE_TOP_KM, MOVIE_BOTTOM_KM,         &
+    MOVIE_EAST_DEG, MOVIE_WEST_DEG, MOVIE_NORTH_DEG, MOVIE_SOUTH_DEG,          &
+    RECORD_LENGTH_IN_MINUTES, NTSTEP_BETWEEN_OUTPUT_SEISMOS,                   &
+    NTSTEP_BETWEEN_READ_ADJSRC, NTSTEP_BETWEEN_FRAMES,                         &
+    NTSTEP_BETWEEN_OUTPUT_INFO, NUMBER_OF_RUNS, NUMBER_OF_THIS_RUN, NCHUNKS,   &
+    SIMULATION_TYPE, MOVIE_VOLUME_TYPE, MOVIE_START, MOVIE_STOP, NEX_XI,       &
+    NEX_ETA, NPROC_XI, NPROC_ETA, NOISE_TOMOGRAPHY, ELLIPTICITY, GRAVITY,      &
+    ROTATION, TOPOGRAPHY, OCEANS, MOVIE_SURFACE, MOVIE_VOLUME, MOVIE_COARSE,   &
+    RECEIVERS_CAN_BE_BURIED, PRINT_SOURCE_TIME_FUNCTION, SAVE_MESH_FILES,      &
+    ATTENUATION, ATTENUATION_NEW, ABSORBING_CONDITIONS, SAVE_FORWARD,          &
+    OUTPUT_SEISMOS_ASCII_TEXT, OUTPUT_SEISMOS_SAC_ALPHANUM,                    &
+    OUTPUT_SEISMOS_SAC_BINARY, ROTATE_SEISMOGRAMS_RT,                          &
+    WRITE_SEISMOGRAMS_BY_MASTER, SAVE_ALL_SEISMOS_IN_ONE_FILE,                 &
+    USE_BINARY_FOR_LARGE_FILE, model_length, MODEL)
+ implicit none
+ ! Parameters
+  integer(kind=8), intent(in) :: adios_handle
+  integer, intent(in)  :: NTSTEP_BETWEEN_OUTPUT_SEISMOS,                       &
+      NTSTEP_BETWEEN_READ_ADJSRC, NTSTEP_BETWEEN_FRAMES,                       &
+      NTSTEP_BETWEEN_OUTPUT_INFO,NUMBER_OF_RUNS, NUMBER_OF_THIS_RUN,NCHUNKS,   &
+      SIMULATION_TYPE, MOVIE_VOLUME_TYPE, MOVIE_START,MOVIE_STOP, NEX_XI,      &
+      NEX_ETA,NPROC_XI,NPROC_ETA, NOISE_TOMOGRAPHY
+  double precision, intent(in) :: ANGULAR_WIDTH_XI_IN_DEGREES,                 &
+      ANGULAR_WIDTH_ETA_IN_DEGREES, CENTER_LONGITUDE_IN_DEGREES,               &
+      CENTER_LATITUDE_IN_DEGREES, GAMMA_ROTATION_AZIMUTH, HDUR_MOVIE,          &
+      MOVIE_TOP_KM,MOVIE_BOTTOM_KM, MOVIE_EAST_DEG,MOVIE_WEST_DEG,             &
+      MOVIE_NORTH_DEG,MOVIE_SOUTH_DEG, RECORD_LENGTH_IN_MINUTES
+  logical, intent(in) :: ELLIPTICITY,GRAVITY,ROTATION,TOPOGRAPHY,OCEANS,       &
+      MOVIE_SURFACE, MOVIE_VOLUME,MOVIE_COARSE, RECEIVERS_CAN_BE_BURIED,       &
+      PRINT_SOURCE_TIME_FUNCTION, SAVE_MESH_FILES,ATTENUATION,ATTENUATION_NEW, &
+      ABSORBING_CONDITIONS,SAVE_FORWARD, OUTPUT_SEISMOS_ASCII_TEXT,            &
+      OUTPUT_SEISMOS_SAC_ALPHANUM,OUTPUT_SEISMOS_SAC_BINARY,                   &
+      ROTATE_SEISMOGRAMS_RT,WRITE_SEISMOGRAMS_BY_MASTER,                       &
+      SAVE_ALL_SEISMOS_IN_ONE_FILE,USE_BINARY_FOR_LARGE_FILE
+  integer, intent(in) :: model_length
+  character(len=*), intent(in) :: MODEL
+  ! Local variables
+  integer  :: adios_err
+
+  call adios_write (adios_handle, "ANGULAR_WIDTH_XI_IN_DEGREES", ANGULAR_WIDTH_XI_IN_DEGREES, adios_err)
+  call adios_write (adios_handle, "ANGULAR_WIDTH_ETA_IN_DEGREES", ANGULAR_WIDTH_ETA_IN_DEGREES, adios_err)
+  call adios_write (adios_handle, "CENTER_LONGITUDE_IN_DEGREES", CENTER_LONGITUDE_IN_DEGREES, adios_err)
+  call adios_write (adios_handle, "CENTER_LATITUDE_IN_DEGREES", CENTER_LATITUDE_IN_DEGREES, adios_err)
+  call adios_write (adios_handle, "GAMMA_ROTATION_AZIMUTH", GAMMA_ROTATION_AZIMUTH, adios_err)
+  call adios_write (adios_handle, "HDUR_MOVIE", HDUR_MOVIE, adios_err)
+  call adios_write (adios_handle, "MOVIE_TOP_KM", MOVIE_TOP_KM, adios_err)
+  call adios_write (adios_handle, "MOVIE_BOTTOM_KM", MOVIE_BOTTOM_KM, adios_err)
+  call adios_write (adios_handle, "MOVIE_EAST_DEG", MOVIE_EAST_DEG, adios_err)
+  call adios_write (adios_handle, "MOVIE_WEST_DEG", MOVIE_WEST_DEG, adios_err)
+  call adios_write (adios_handle, "MOVIE_NORTH_DEG", MOVIE_NORTH_DEG, adios_err)
+  call adios_write (adios_handle, "MOVIE_SOUTH_DEG", MOVIE_SOUTH_DEG, adios_err)
+  call adios_write (adios_handle, "RECORD_LENGTH_IN_MINUTES", RECORD_LENGTH_IN_MINUTES, adios_err)
+  call adios_write (adios_handle, "NTSTEP_BETWEEN_OUTPUT_SEISMOS", NTSTEP_BETWEEN_OUTPUT_SEISMOS, adios_err)
+  call adios_write (adios_handle, "NTSTEP_BETWEEN_READ_ADJSRC", NTSTEP_BETWEEN_READ_ADJSRC, adios_err)
+  call adios_write (adios_handle, "NTSTEP_BETWEEN_FRAMES", NTSTEP_BETWEEN_FRAMES, adios_err)
+  call adios_write (adios_handle, "NTSTEP_BETWEEN_OUTPUT_INFO", NTSTEP_BETWEEN_OUTPUT_INFO, adios_err)
+  call adios_write (adios_handle, "NUMBER_OF_RUNS", NUMBER_OF_RUNS, adios_err)
+  call adios_write (adios_handle, "NUMBER_OF_THIS_RUN", NUMBER_OF_THIS_RUN, adios_err)
+  call adios_write (adios_handle, "NCHUNKS", NCHUNKS, adios_err)
+  call adios_write (adios_handle, "SIMULATION_TYPE", SIMULATION_TYPE, adios_err)
+  call adios_write (adios_handle, "MOVIE_VOLUME_TYPE", MOVIE_VOLUME_TYPE, adios_err)
+  call adios_write (adios_handle, "MOVIE_START", MOVIE_START, adios_err)
+  call adios_write (adios_handle, "MOVIE_STOP", MOVIE_STOP, adios_err)
+  call adios_write (adios_handle, "NEX_XI", NEX_XI, adios_err)
+  call adios_write (adios_handle, "NEX_ETA", NEX_ETA, adios_err)
+  call adios_write (adios_handle, "NPROC_XI", NPROC_XI, adios_err)
+  call adios_write (adios_handle, "NPROC_ETA", NPROC_ETA, adios_err)
+  call adios_write (adios_handle, "NOISE_TOMOGRAPHY", NOISE_TOMOGRAPHY, adios_err)
+  call adios_write (adios_handle, "ELLIPTICITY", ELLIPTICITY, adios_err)
+  call adios_write (adios_handle, "GRAVITY", GRAVITY, adios_err)
+  call adios_write (adios_handle, "ROTATION", ROTATION, adios_err)
+  call adios_write (adios_handle, "TOPOGRAPHY", TOPOGRAPHY, adios_err)
+  call adios_write (adios_handle, "OCEANS", OCEANS, adios_err)
+  call adios_write (adios_handle, "MOVIE_SURFACE", MOVIE_SURFACE, adios_err)
+  call adios_write (adios_handle, "MOVIE_VOLUME", MOVIE_VOLUME, adios_err)
+  call adios_write (adios_handle, "MOVIE_COARSE", MOVIE_COARSE, adios_err)
+  call adios_write (adios_handle, "RECEIVERS_CAN_BE_BURIED", RECEIVERS_CAN_BE_BURIED, adios_err)
+  call adios_write (adios_handle, "PRINT_SOURCE_TIME_FUNCTION", PRINT_SOURCE_TIME_FUNCTION, adios_err)
+  call adios_write (adios_handle, "SAVE_MESH_FILES", SAVE_MESH_FILES, adios_err)
+  call adios_write (adios_handle, "ATTENUATION", ATTENUATION, adios_err)
+  call adios_write (adios_handle, "ATTENUATION_NEW", ATTENUATION_NEW, adios_err)
+  call adios_write (adios_handle, "ABSORBING_CONDITIONS", ABSORBING_CONDITIONS, adios_err)
+  call adios_write (adios_handle, "SAVE_FORWARD", SAVE_FORWARD, adios_err)
+  call adios_write (adios_handle, "OUTPUT_SEISMOS_ASCII_TEXT", OUTPUT_SEISMOS_ASCII_TEXT, adios_err)
+  call adios_write (adios_handle, "OUTPUT_SEISMOS_SAC_ALPHANUM", OUTPUT_SEISMOS_SAC_ALPHANUM, adios_err)
+  call adios_write (adios_handle, "OUTPUT_SEISMOS_SAC_BINARY", OUTPUT_SEISMOS_SAC_BINARY, adios_err)
+  call adios_write (adios_handle, "ROTATE_SEISMOGRAMS_RT", ROTATE_SEISMOGRAMS_RT, adios_err)
+  call adios_write (adios_handle, "WRITE_SEISMOGRAMS_BY_MASTER", WRITE_SEISMOGRAMS_BY_MASTER, adios_err)
+  call adios_write (adios_handle, "SAVE_ALL_SEISMOS_IN_ONE_FILE", SAVE_ALL_SEISMOS_IN_ONE_FILE, adios_err)
+  call adios_write (adios_handle, "USE_BINARY_FOR_LARGE_FILE", USE_BINARY_FOR_LARGE_FILE, adios_err)
+  call adios_write (adios_handle, "model_length", model_length, adios_err)
+  call adios_write (adios_handle, "MODEL", MODEL, adios_err)
+end subroutine write_adios_par_file_variables 
+
+!> \brief Wrapper to write the 'CMTSOLUTION' variables into the adios header
+!! \param adios_handle The handle to the file where the variable should be
+!!                     written
+subroutine write_adios_cmtsolution_variables (adios_handle,                    &
+    NSOURCES, yr, mo, da, ho, mi, sec, t_shift, hdur, lat, long, depth,        &
+    mrr, mtt, mpp, mrt, mrp, mtp, event_name_length, event_name,               &
+    datasource_length, datasource)
+  implicit none
+  ! Parameters
+  integer(kind=8), intent(in) :: adios_handle
+  integer, intent(in) :: NSOURCES
+  integer,          dimension(NSOURCES), intent(in) :: yr, mo, da, ho, mi
+  double precision, dimension(NSOURCES), intent(in) :: sec, t_shift, hdur,     &
+                                                       lat, long, depth
+  double precision, dimension(NSOURCES), intent(in) :: mrr, mtt, mpp,          &
+                                                       mrt, mrp, mtp 
+  integer, intent(in) :: event_name_length, datasource_length
+  character(len=16), intent(in) :: event_name
+  character(len=:), allocatable, intent(in)  :: datasource  ! F03 feature
+  ! Local variables
+  integer :: adios_err
+
+  call adios_write (adios_handle, "NSOURCES", NSOURCES, adios_err)
+  call adios_write (adios_handle, "year", yr, adios_err)
+  call adios_write (adios_handle, "month", mo, adios_err)
+  call adios_write (adios_handle, "day", da, adios_err)
+  call adios_write (adios_handle, "hour", ho, adios_err)
+  call adios_write (adios_handle, "minute", mi, adios_err)
+  call adios_write (adios_handle, "second", sec, adios_err)
+  call adios_write (adios_handle, "time_shift", t_shift, adios_err)
+  call adios_write (adios_handle, "half_duration", hdur, adios_err)
+  call adios_write (adios_handle, "latitude", lat, adios_err)
+  call adios_write (adios_handle, "longitude", long, adios_err)
+  call adios_write (adios_handle, "depth", depth, adios_err)
+  call adios_write (adios_handle, "mrr", mrr, adios_err)
+  call adios_write (adios_handle, "mtt", mtt, adios_err)
+  call adios_write (adios_handle, "mpp", mpp, adios_err)
+  call adios_write (adios_handle, "mrt", mrt, adios_err)
+  call adios_write (adios_handle, "mrp", mrp, adios_err)
+  call adios_write (adios_handle, "mtp", mtp, adios_err)
+  call adios_write (adios_handle, "event_name_length", event_name_length, adios_err)
+  call adios_write (adios_handle, "event_name", event_name, adios_err)
+  call adios_write (adios_handle, "datasource_length", datasource_length, adios_err)
+  call adios_write (adios_handle, "datasource", datasource, adios_err)
+end subroutine write_adios_cmtsolution_variables
+
+!> \brief Wrapper to write the 'STATIONS' variables into the adios header
+!! \param adios_handle The handle to the file where the variable should be
+!!                     written
+subroutine write_adios_stations_variables (adios_handle,                       &
+    NSTATIONS, stlat, stlon, stele, stbur, station_name_length, station_name,  &
+    network_name_length, network_name)
+  implicit none
+  ! Parameters
+  integer(kind=8), intent(in) :: adios_handle
+  integer, intent(in):: NSTATIONS
+  integer, intent(in):: station_name_length, network_name_length ! for later reading
+  character(len=:), allocatable, intent(in) :: station_name, network_name 
+  double precision, allocatable, dimension(:), intent(in) :: stlat, stlon,     &
+                                                             stele, stbur
+  ! Local variables
+  integer :: adios_err
+
+  call adios_write (adios_handle, "NSTATIONS", NSTATIONS, adios_err)
+  call adios_write (adios_handle, "station_latitude", stlat, adios_err)
+  call adios_write (adios_handle, "station_longitude", stlon, adios_err)
+  call adios_write (adios_handle, "station_elevation", stele, adios_err)
+  call adios_write (adios_handle, "station_burial", stbur, adios_err)
+  call adios_write (adios_handle, "station_name_length", station_name_length, adios_err)
+  call adios_write (adios_handle, "network_name_length", network_name_length, adios_err)
+  call adios_write (adios_handle, "station_name", station_name, adios_err)
+  call adios_write (adios_handle, "network_name", network_name, adios_err)
+end subroutine write_adios_stations_variables
+
+
+end subroutine write_specfem_header_adios